test=develop

6 years ago · c1fa2ec360
parent c83d5b7a16 674aed6a6c
commit c1fa2ec360
3204 changed files with 139937 additions and 253967 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -42,12 +42,6 @@ repos:
        entry: bash ./tools/codestyle/pylint_pre_commit.hook
        language: system
        files: \.(py)$
-   repo: https://github.com/PaddlePaddle/pre-commit-golang
-    sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
-    hooks:
-    -   id: go-fmt
-        types:
-        - go
 -   repo: local
    hooks:
    -   id: copyright_checker
--- a/.travis.yml
+++ b/.travis.yml
@ -4,7 +4,6 @@ cache:
    - $HOME/.ccache
    - $HOME/.cache/pip
    - $TRAVIS_BUILD_DIR/build/third_party
-    - $TRAVIS_BUILD_DIR/build_android/third_party
 sudo: required
 dist: trusty
 services:
@ -13,7 +12,6 @@ os:
  - linux
 env:
  - JOB=check_style
-  - JOB=build_android
 addons:
  ssh_known_hosts: 13.229.163.131
 before_install:
--- a/AUTHORS.md
+++ b/AUTHORS.md
@ -44,6 +44,7 @@
 | qingqing01 | Qing-Qing Dang |
 | reyoung | Yang Yu |
 | Sand3r- | Michal Gallus |
+| sfraczek | Sylwester Fraczek |
 | Superjom | Chun-Wei Yan |
 | tensor-tang | Jian Tang |
 | tianbingsz | Tian-Bing Xu |
@ -54,6 +55,7 @@
 | wangyang59 | Yang Wang |
 | wangzhen-nlp | Zhen Wang |
 | wen-bo-yang | Wen-Bo Yang |
+| wojtuss | Wojciech Uss |
 | wwhu | Wei-Wei Hu |
 | xinghai-sun | Xing-Hai Sun |
 | Xreki | Yi-Qun Liu |
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -128,7 +128,7 @@ Please install pre-commit, which automatically reformat the changes to C/C++ and

 Please remember to add related unit tests.

- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md).
+- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/primer.md) .

 - For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).

@ -156,7 +156,7 @@ python \

 This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3.  This suggests that we output overall messages in lower verbose levels, so they display with higher probability.  When coding C++, please follow the verbose level convention as follows:

- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)
+- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/framework)
+- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators)
+- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/platform)
+- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid/operators/math/)
--- a/91
+++ b/91
@ -11,12 +11,10 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub
 # ENV variables
 ARG WITH_GPU
 ARG WITH_AVX
-ARG WITH_DOC

 ENV WOBOQ OFF
 ENV WITH_GPU=${WITH_GPU:-ON}
 ENV WITH_AVX=${WITH_AVX:-ON}
-ENV WITH_DOC=${WITH_DOC:-OFF}

 ENV HOME /root
 # Add bash enhancements
@ -77,8 +75,9 @@ RUN curl -s -q https://glide.sh/get | sh
 #    and its size is only one-third of the official one.
 # 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
 #    See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
-RUN wget -qO- http://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
-    tar -xz -C /usr/local && \
+
+RUN wget -q https://paddlepaddledeps.cdn.bcebos.com/TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz --no-check-certificate && \
+    tar -zxf TensorRT-4.0.1.6-ubuntu14.04.x86_64-gnu.cuda.8.0.cudnn7.0.tar.gz -C /usr/local && \
    cp -rf /usr/local/TensorRT/include /usr && \
    cp -rf /usr/local/TensorRT/lib /usr

@ -94,52 +93,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
 # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
 # version(1.7.1 for now), which causes building documentation failed.
-RUN pip3 install -U wheel && \
-    pip3 install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.6 install -U wheel && \
-    pip3.6 install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3.6 install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.7 install -U wheel && \
-    pip3.7 install -U docopt PyYAML sphinx==1.5.6 && \
-    pip3.7 install sphinx-rtd-theme==0.1.9 recommonmark && \
+RUN pip3 --no-cache-dir install -U wheel && \
+    pip3 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
+    pip3.6 --no-cache-dir install -U wheel && \
+    pip3.6 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3.6 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
+    pip3.7 --no-cache-dir install -U wheel && \
+    pip3.7 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
+    pip3.7 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
    easy_install -U pip && \
-    pip install -U pip setuptools wheel && \
-    pip install -U docopt PyYAML sphinx==1.5.6 && \
-    pip install sphinx-rtd-theme==0.1.9 recommonmark
-
-RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3 install opencv-python && \
-    pip3.6 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3.6 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3.6 install opencv-python && \
-    pip3.7 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip3.7 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip3.7 install opencv-python && \
-    pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
-    pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
-    pip install opencv-python
+    pip --no-cache-dir install -U pip setuptools wheel && \
+    pip --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
+    pip --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark
+
+RUN pip3 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip3 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3 --no-cache-dir install opencv-python && \
+    pip3.6 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip3.6 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3.6 --no-cache-dir install opencv-python && \
+    pip3.7 --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip3.7 --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip3.7 --no-cache-dir install opencv-python && \
+    pip --no-cache-dir install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
+    pip --no-cache-dir install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
+    pip --no-cache-dir install opencv-python

 #For docstring checker
-RUN pip3 install pylint pytest astroid isort
-RUN pip3.6 install pylint pytest astroid isort
-RUN pip3.7 install pylint pytest astroid isort
-RUN pip install pylint pytest astroid isort LinkChecker
+RUN pip3 --no-cache-dir install pylint pytest astroid isort
+RUN pip3.6 --no-cache-dir install pylint pytest astroid isort
+RUN pip3.7 --no-cache-dir install pylint pytest astroid isort
+RUN pip --no-cache-dir install pylint pytest astroid isort LinkChecker

 COPY ./python/requirements.txt /root/
-RUN pip3 install -r /root/requirements.txt
-RUN pip3.6 install -r /root/requirements.txt
-RUN pip3.7 install -r /root/requirements.txt
-RUN pip install -r /root/requirements.txt
+RUN pip3 --no-cache-dir install -r /root/requirements.txt
+RUN pip3.6 --no-cache-dir install -r /root/requirements.txt
+RUN pip3.7 --no-cache-dir install -r /root/requirements.txt
+RUN pip --no-cache-dir install -r /root/requirements.txt

 # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
 # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
-RUN apt-get install -y libssl-dev libffi-dev
-RUN pip3 install certifi urllib3[secure]
-RUN pip3.6 install certifi urllib3[secure]
-RUN pip3.7 install certifi urllib3[secure]
-RUN pip install certifi urllib3[secure]
+RUN apt-get install -y libssl-dev libffi-dev && apt-get clean -y
+RUN pip3 --no-cache-dir install certifi urllib3[secure]
+RUN pip3.6 --no-cache-dir install certifi urllib3[secure]
+RUN pip3.7 --no-cache-dir install certifi urllib3[secure]
+RUN pip --no-cache-dir install certifi urllib3[secure]


 # Install woboq_codebrowser to /woboq
@ -149,6 +148,14 @@ RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \
           -DCMAKE_BUILD_TYPE=Release . \
     make)

+# ar mishandles 4GB files
+# https://sourceware.org/bugzilla/show_bug.cgi?id=14625
+# remove them when apt-get support 2.27 and higher version
+RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/2.27-9ubuntu1/binutils_2.27.orig.tar.gz && \
+    tar -xzf binutils_2.27.orig.tar.gz && \
+    cd binutils-2.27 && \
+    ./configure && make -j && make install && cd .. && rm -rf binutils-2.27 binutils_2.27.orig.tar.gz
+
 # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
 RUN mkdir /var/run/sshd
 RUN echo 'root:root' | chpasswd
--- a/Dockerfile.android
+++ b/Dockerfile.android
@ -1,42 +0,0 @@
-FROM ubuntu:16.04
-MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
-
-ARG UBUNTU_MIRROR
-RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
-
-# ENV variables
-ARG ANDROID_ABI
-ARG ANDROID_API
-
-ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"}
-ENV ANDROID_API=${ANDROID_API:-21}
-
-ENV HOME=/root \
-    ANDROID_NDK_HOME=/opt/android-ndk-linux \
-    ANDROID_TOOLCHAINS_DIR=/opt/toolchains
-
-RUN apt-get update && \
-    apt-get install -y \
-    git python-dev python-pip python-numpy \
-    wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
-    apt-get clean -y
-
-# git credential to skip password typing
-RUN git config --global credential.helper store
-
-# Fix locales to en_US.UTF-8
-RUN localedef -i en_US -f UTF-8 en_US.UTF-8
-
-RUN pip install --upgrade pip==9.0.3 && \
-    pip install -U 'protobuf==3.1.0' && \
-    pip install -U wheel sphinx && \
-    pip install pre-commit
-
-# Android NDK
-RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
-    mkdir -p /opt/android-ndk-tmp && \
-    cd /opt/android-ndk-tmp && \
-    wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \
-    unzip -q android-ndk-r14b-linux-x86_64.zip && \
-    mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
-    rm -rf /opt/android-ndk-tmp
--- a/README.md
+++ b/README.md
@ -1,13 +1,13 @@
 # PaddlePaddle

+English | [简体中文](./README_cn.md)

 [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
 [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
 [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)

-
 Welcome to the PaddlePaddle GitHub.

 PaddlePaddle (PArallel Distributed Deep LEarning) is an easy-to-use,
@ -18,8 +18,7 @@ learning to many products at Baidu.
 Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.

-
-### Latest PaddlePaddle Release: [Fluid 1.1.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.1)
+### Latest PaddlePaddle Release: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
 ### Install Latest Stable Release:
 ```
 # Linux CPU
@ -27,9 +26,9 @@ pip install paddlepaddle
 # Linux GPU cuda9cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda8cudnn7
-pip install paddlepaddle-gpu==1.1.0.post87
+pip install paddlepaddle-gpu==1.3.0.post87
 # Linux GPU cuda8cudnn5
-pip install paddlepaddle-gpu==1.1.0.post85
+pip install paddlepaddle-gpu==1.3.0.post85

 # For installation on other platform, refer to http://paddlepaddle.org/
 ```
@ -76,30 +75,29 @@ pip install paddlepaddle-gpu==1.1.0.post85

 ## Installation

-It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) on our website.
+It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) on our website.

 ## Documentation

-We provide [English](http://paddlepaddle.org/documentation/docs/en/1.1/getstarted/index_en.html) and
-[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.1/beginners_guide/index.html) documentation.
+We provide [English](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html) and
+[Chinese](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) documentation.

 - [Deep Learning 101](https://github.com/PaddlePaddle/book)

  You might want to start from this online interactive book that can run in a Jupyter Notebook.

- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/1.1/user_guides/howto/training/cluster_howto.html)
+- [Distributed Training](http://paddlepaddle.org/documentation/docs/en/1.3/user_guides/howto/training/multi_node_en.html)

  You can run distributed training jobs on MPI clusters.

- [Python API](http://paddlepaddle.org/documentation/api/zh/1.1/fluid.html)
+- [Python API](http://paddlepaddle.org/documentation/docs/en/1.3/api/index_en.html)

   Our new API enables much shorter programs.

- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/1.1/advanced_usage/development/contribute_to_paddle.html)
+- [How to Contribute](http://paddlepaddle.org/documentation/docs/en/1.3/advanced_usage/development/contribute_to_paddle/index_en.html)

   We appreciate your contributions!

-
 ## Ask Questions

 You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).
--- a/README_cn.md
+++ b/README_cn.md
@ -0,0 +1,88 @@
+# PaddlePaddle
+
+[English](./README.md) | 简体中文
+
+[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+
+欢迎来到 PaddlePaddle GitHub
+
+PaddlePaddle (PArallel Distributed Deep LEarning) 是一个简单易用、高效灵活、可扩展的深度学习平台，最初由百度科学家和工程师共同开发，目的是将深度学习技术应用到百度的众多产品中。
+
+我们的愿景是让每个人都能通过PaddlePaddle接触深度学习
+
+跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
+
+### PaddlePaddle最新版本: [Fluid 1.3.0](https://github.com/PaddlePaddle/Paddle/tree/release/1.3)
+### 安装最新稳定版本:
+```
+# Linux CPU
+pip install paddlepaddle
+# Linux GPU cuda9cudnn7
+pip install paddlepaddle-gpu
+# Linux GPU cuda8cudnn7
+pip install paddlepaddle-gpu==1.3.0.post87
+# Linux GPU cuda8cudnn5
+pip install paddlepaddle-gpu==1.3.0.post85
+
+# 其他平台上的安装指引请参考 http://paddlepaddle.org/
+```
+
+## 特性
+
+- **灵活性**
+
+    PaddlePaddle支持丰富的神经网络架构和优化算法。易于配置复杂模型，例如带有注意力机制或复杂记忆连接的神经网络机器翻译模型。
+
+-  **高效性**
+
+    为了高效使用异步计算资源，PaddlePaddle对框架的不同层进行优化，包括计算、存储、架构和通信。下面是一些样例：
+
+    - 通过SSE/AVX 内置函数、BLAS库(例如MKL、OpenBLAS、cuBLAS)或定制的CPU/GPU内核优化数学操作。
+    - 通过MKL-DNN库优化CNN网络
+    - 高度优化循环网络，无需执行 `padding` 操作即可处理 **变长** 序列
+    - 针对高维稀疏数据模型，优化了局部和分布式训练。
+
+
+- **稳定性**
+
+    有了 PaddlePaddle，使得利用各种CPU/GPU和机器来加速训练变得简单。PaddlePaddle 通过优化通信可以实现巨大吞吐量和快速执行。
+
+- **与产品相连**
+
+    另外，PaddlePaddle 的设计也易于部署。在百度，PaddlePaddle 已经部署到含有巨大用户量的产品和服务上，包括广告点击率（CTR）预测、大规模图像分类、光学字符识别（OCR）、搜索排序，计算机病毒检测、推荐系统等等。PaddlePaddle广泛应用于百度产品中，产生了非常重要的影响。我们希望您也能探索 PaddlePaddle 的能力，为您的产品创造新的影响力和效果。
+
+## 安装
+
+推荐阅读官网上的[安装说明](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)
+
+## 文档
+
+我们提供[英文](http://paddlepaddle.org/documentation/docs/en/1.3/beginners_guide/index_en.html)和
+[中文](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/index.html) 文档
+
+- [深度学习101](https://github.com/PaddlePaddle/book)
+
+  或许您想从这个在线交互式书籍开始，可以在Jupyter Notebook中运行
+
+- [分布式训练](http://paddlepaddle.org/documentation/docs/zh/1.3/user_guides/howto/training/multi_node.html)
+
+  可以在MPI集群上运行分布式训练任务
+
+- [Python API](http://paddlepaddle.org/documentation/docs/zh/1.3/api_cn/index_cn.html)
+
+   新的API支持代码更少更简洁的程序
+
+- [贡献方式](http://paddlepaddle.org/documentation/docs/zh/1.3/advanced_usage/development/contribute_to_paddle/index_cn.html)
+
+   欢迎您的贡献!
+
+## 答疑
+
+欢迎您将问题和bug报告以[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)的形式提交
+
+## 版权和许可证
+PaddlePaddle由[Apache-2.0 license](LICENSE)提供
--- a/benchmark/IntelOptimizedPaddle.md
+++ b/benchmark/IntelOptimizedPaddle.md
@ -1,112 +0,0 @@
-# Benchmark
-
-Machine:
-
- Server: Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, 2 Sockets, 20 Cores per socket
- Laptop: TBD
-
-System: CentOS release 6.3 (Final), Docker 1.12.1.
-
-PaddlePaddle:
- paddlepaddle/paddle:0.11.0 (for MKLML and MKL-DNN)
-  - MKL-DNN tag v0.11
-  - MKLML 2018.0.1.20171007
- paddlepaddle/paddle:0.11.0-openblas (for OpenBLAS)
-  - OpenBLAS v0.2.20
-	 
-On each machine, we will test and compare the performance of training on single node using MKL-DNN / MKLML / OpenBLAS respectively.
-
-## Benchmark Model
-
-### Server
-
-#### Training
-Test on batch size 64, 128, 256 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
-Pay attetion that the speed below includes forward, backward and parameter update time. So we can not directly compare the data with the benchmark of caffe `time` [command](https://github.com/PaddlePaddle/Paddle/blob/develop/benchmark/caffe/image/run.sh#L9), which only contain forward and backward. The updating time of parameter would become very heavy when the weight size are large, especially on alexnet.
-
-Input image size - 3 * 224 * 224, Time: images/second
-
- VGG-19
-
-| BatchSize    | 64    | 128  | 256     |
-|--------------|-------| -----| --------|
-| OpenBLAS     | 7.80  | 9.00  | 10.80  | 
-| MKLML        | 12.12 | 13.70 | 16.18  |
-| MKL-DNN      | 28.46 | 29.83 | 30.44  |
-
-<img src="figs/vgg-cpu-train.png" width="500">
-
- - ResNet-50
-
-| BatchSize    | 64    | 128   | 256    |
-|--------------|-------| ------| -------|
-| OpenBLAS     | 25.22 | 25.68 | 27.12  | 
-| MKLML        | 32.52 | 31.89 | 33.12  |
-| MKL-DNN      | 81.69 | 82.35 | 84.08  |
-
-<img src="figs/resnet-cpu-train.png" width="500">
-
- - GoogLeNet
-
-| BatchSize    | 64    | 128   | 256    |
-|--------------|-------| ------| -------|
-| OpenBLAS     | 89.52 | 96.97 | 108.25 | 
-| MKLML        | 128.46| 137.89| 158.63 |
-| MKL-DNN      | 250.46| 264.83| 269.50 |
-
-<img src="figs/googlenet-cpu-train.png" width="500">
-
- AlexNet
-
-| BatchSize    | 64     | 128    | 256    |
-|--------------|--------| ------ | -------|
-| OpenBLAS     | 45.62  | 72.79  | 107.22 | 
-| MKLML        | 66.37  | 105.60 | 144.04 |
-| MKL-DNN      | 399.00 | 498.94 | 626.53 | 
-
-<img src="figs/alexnet-cpu-train.png" width="500">
-
-#### Inference
-Test on batch size 1, 2, 4, 8, 16 on Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz
- VGG-19
-
-| BatchSize | 1     | 2     | 4     | 8     | 16    |
-|-----------|-------|-------|-------|-------|-------|
-| OpenBLAS  | 1.10  | 1.96  | 3.62  | 3.63  | 2.25  |
-| MKLML     | 5.58  | 9.80  | 15.15 | 21.21 | 28.67 |
-| MKL-DNN   | 75.07 | 88.64 | 82.58 | 92.29 | 96.75 |
-
-<img src="figs/vgg-cpu-infer.png" width="500">
-
- ResNet-50
-
-| BatchSize | 1     | 2      | 4      | 8      | 16     |
-|-----------|-------|--------|--------|--------|--------|
-| OpenBLAS  | 3.31  | 6.72   | 11.59  | 13.17  | 9.27   |
-| MKLML     | 6.33  | 12.02  | 22.88  | 40.53  | 63.09  |
-| MKL-DNN   | 107.83| 148.84 | 177.78 | 189.35 | 217.69 |
-
-<img src="figs/resnet-cpu-infer.png" width="500">
-
- GoogLeNet
-
-| BatchSize | 1      | 2      | 4      | 8      | 16     |
-|-----------|--------|--------|--------|--------|--------|
-| OpenBLAS  | 12.06  | 23.56  | 34.48  | 36.45  | 23.12  |
-| MKLML     | 22.74  | 41.56  | 81.22  | 133.47 | 210.53 |
-| MKL-DNN   | 175.10 | 272.92 | 450.70 | 512.00 | 600.94 |
-
-<img src="figs/googlenet-cpu-infer.png" width="500">
-
- AlexNet
-
-| BatchSize | 1      | 2      | 4      | 8      | 16     |
-|-----------|--------|--------|--------|--------|--------|
-| OpenBLAS  | 3.53   | 6.23   | 15.04  | 26.06  | 31.62  |
-| MKLML     | 21.32  | 36.55  | 73.06  | 131.15 | 192.77 |
-| MKL-DNN   | 442.91 | 656.41 | 719.10 | 847.68 | 850.51 |
-
-<img src="figs/alexnet-cpu-infer.png" width="500">
-
-### Laptop
-TBD
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -1,168 +0,0 @@
-# Benchmark
-
-Machine: 
-
- CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
- GPU: Tesla K40m
- cuDNN: v5.1
- system: Docker 1.12.1, all platforms are tested in docker environment.
-
-Platforms: 
-
- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0 
- Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu 
- Caffe: kaixhin/cuda-caffe
-
-Several convolutional neural networks and recurrent neural networks are used to test.
-
-## Image
-
-### Benchmark Model
-
-AlexNet, GoogleNet and a small network used in Caffe.
-
- [AlexNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet): but the group size is one.
-
- [GoogleNet](https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet): but remove loss1 and loss2 when testing benchmark.
-
- [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)
-
-
-### Single-GPU
-
- AlexNet:  input - 3 * 227 * 227,  Time: ms/batch
-
-| BatchSize    | 64  | 128  | 256   | 512  |
-|--------------|-----| -----| ------| -----|
-| PaddlePaddle | 195 | 334  | 602   | 1629 |
-| TensorFlow   | 223 | 364  | 645   | 1235 |
-| Caffe        | 324 | 627  | 1232  | 2513 |
- 
-**Notation**
-
-All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, because its workspace limit size of cuDNN-conv interface is 8 * 1024 * 1024, which is smaller in PaddlePaddle and TensorFlow. Note that Caffe will be faster if increasing the workspace limit size.
- 
- GoogletNet:  input - 3 * 224 * 224, Time: ms/batch
-
-
-| BatchSize    | 64    |   128  | 256     |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 613   | 1149   | 2348    |
-| TensorFlow   | 644   | 1176   | 2219    |
-| Caffe        | 694   | 1364   | out of memory   |
-
- SmallNet: input - 3 * 32 * 32, Time ms/batch
-
-| BatchSize    | 64     |   128    | 256     | 512     |
-|--------------|--------| -------- | --------|---------|
-| PaddlePaddle | 10.463 | 18.184   | 33.113  |  63.039 |
-| TensorFlow   | 9     | 15       | 28      | 59       |
-| Caffe        | 9.373  | 16.6606  | 31.4797 | 59.719  |
-
-**Notation**
-
-All the single-GPU experiments in caffe use `caffe time` to calculate elapsed time, which does not include parameter updating time. However, both PaddlePaddle and TensorFlow experiments contain the parameter updating time. As compared with the total time, this part is relatively little on single machine, we can ignore it.
-
-In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.
-
-### Multi-GPU: 4 GPUs
-
- AlexNet,  ms / batch
-
-| total-BatchSize | 128 * 4  | 256 * 4    |
-|------------------|----------| -----------|
-| PaddlePaddle     | 347      | 622        |
-| TensorFlow       | 377      | 675        |
-| Caffe            | 1229     | 2435       |
-
-For example, if `total-BatchSize = 128 * 4`, the speedup ratio is calculated by 
-
-```
-  time_at_1gpu_batch_128 * 4 / time_at_4gpu_total_batch_512 
-= (334 * 4)/347 
-= 3.85
-``` 
-
-<img src="figs/alexnet-4gpu.png" width="420">
-
-
- GoogleNet, ms / batch
-
-| total-BatchSize  | 128 * 4      |  256 * 4    |
-|-------------------|--------------| ----------- |
-| PaddlePaddle      | 1178         | 2367        |
-| TensorFlow        | 1210         | 2292        |
-| Caffe             | 2007         | out of memory  |
-
-<img src="figs/googlenet-4gpu.png" width="420">
-
-
-## RNN
-We use lstm network for text classfication to test benchmark.
-
-### Dataset
-  [IMDB](http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl)
- Sequence length is 100. In fact, PaddlePaddle supports training with variable-length sequence, but TensorFlow needs to pad. Thus, we also pad sequence length to 100 in PaddlePaddle in order to compare.
- Dictionary size=30000 
- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
-
-### Single-GPU
-
-#### LSTM in Text Classification
-
-Testing `2 lstm layer + fc` network with different hidden size and batch size.
-  
- Batch size = 64, ms / batch
- 
-| hidden_size  | 256   | 512    |  1280   |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 83    | 184    | 641     |
-| TensorFlow   | 175   | 280    | 818     |
-
- Batch size = 128, ms / batch
- 
-| hidden_size  | 256    | 512    |  1280   |
-|--------------|------- | -------| --------|
-| PaddlePaddle | 110    | 261    | 1007    |
-| TensorFlow   | 181    | 361    | 1237    |
-
-
- Batch size = 256, ms / batch
- 
-| hidden_size  | 256   | 512    |  1280   |
-|--------------|-------| -------| --------|
-| PaddlePaddle | 170   | 414    | 1655    |
-| TensorFlow   | 238   | 536    | 1905    |
-
-<img src="figs/rnn_lstm_cls.png" width="600">
-
-#### Seq2Seq
-
-The benchmark of sequence-to-sequence network will be added later.
- 
-
-### Multi GPU: 4 GPUs
-
-#### LSTM in Text Classification
-
- hidden_size = 256, ms / batch
- 
-| batch_size   | 256    |  512    |
-|--------------| -------| --------|
-| PaddlePaddle | 90     | 118     |
-| TensorFlow   | 226    | 118     |
-
-
- hidden_size = 512, ms / batch
- 
-| batch_size   | 256    |  512    |
-|--------------| -------| --------|
-| PaddlePaddle | 189    | 268     |
-| TensorFlow   | 297    | 383     |
-
-
-<img src="figs/rnn_lstm_4gpus.png" width="420">
-
-#### Seq2Seq
-
-The benchmark of sequence-to-sequence network will be added later.
--- a/benchmark/fluid/Dockerfile
+++ b/benchmark/fluid/Dockerfile
@ -15,9 +15,6 @@ RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s
 RUN pip install -U pip
 RUN pip install -U kubernetes paddlepaddle

-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
-RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.imikolov.fetch()" | python'
 RUN pip uninstall -y paddlepaddle && mkdir /workspace

 ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
--- a/benchmark/paddle/image/check_env.sh
+++ b/benchmark/paddle/image/check_env.sh
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@ -81,9 +81,11 @@ def dist_transpile(trainer_id, args, train_prog, startup_prog):
    # the role, should be either PSERVER or TRAINER
    training_role = os.getenv("PADDLE_TRAINING_ROLE")

-    config = distribute_transpiler.DistributeTranspilerConfig()
+    config = fluid.DistributeTranspilerConfig()
    config.slice_var_up = not args.no_split_var
+    config.min_block_size = 1048576
    t = distribute_transpiler.DistributeTranspiler(config=config)
+
    t.transpile(
        trainer_id,
        # NOTE: *MUST* use train_prog, for we are using with guard to
@ -177,7 +179,6 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
    else:
        build_strategy.reduce_strategy = fluid.BuildStrategy(
        ).ReduceStrategy.AllReduce
-    build_strategy.fuse_broadcast_op = args.fuse_broadcast_op

    avg_loss = train_args[0]

--- a/benchmark/paddle/image/alexnet.py
+++ b/benchmark/paddle/image/alexnet.py
@ -1,93 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-height = 227
-width = 227
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
-gp = get_config_arg('layer_num', int, 1)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
-    'height': height,
-    'width': width,
-    'color': True,
-    'num_class': num_class,
-    'is_infer': is_infer,
-    'num_samples': num_samples
-}
-define_py_data_sources2(
-    "train.list" if not is_infer else None,
-    "test.list" if is_infer else None,
-    module="provider",
-    obj="process",
-    args=args)
-
-settings(
-    batch_size=batch_size,
-    learning_rate=0.01 / batch_size,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * batch_size))
-
-# conv1
-net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(
-    input=net,
-    filter_size=11,
-    num_channels=3,
-    num_filters=96,
-    stride=4,
-    padding=1)
-net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-# conv2
-net = img_conv_layer(
-    input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp)
-net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-# conv3
-net = img_conv_layer(
-    input=net, filter_size=3, num_filters=384, stride=1, padding=1)
-# conv4
-net = img_conv_layer(
-    input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp)
-
-# conv5
-net = img_conv_layer(
-    input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp)
-net = img_pool_layer(input=net, pool_size=3, stride=2)
-
-net = fc_layer(
-    input=net,
-    size=4096,
-    act=ReluActivation(),
-    layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(
-    input=net,
-    size=4096,
-    act=ReluActivation(),
-    layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
-
-if is_infer:
-    outputs(net)
-else:
-    lab = data_layer('label', num_class)
-    loss = cross_entropy(input=net, label=lab)
-    outputs(loss)
--- a/benchmark/paddle/image/googlenet.py
+++ b/benchmark/paddle/image/googlenet.py
@ -1,245 +0,0 @@
-#!/usr/bin/env python
-from paddle.trainer_config_helpers import *
-
-height = 224
-width = 224
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128)
-use_gpu = get_config_arg('use_gpu', bool, True)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
-    'height': height,
-    'width': width,
-    'color': True,
-    'num_class': num_class,
-    'is_infer': is_infer,
-    'num_samples': num_samples
-}
-define_py_data_sources2(
-    "train.list" if not is_infer else None,
-    "test.list" if is_infer else None,
-    module="provider",
-    obj="process",
-    args=args)
-
-settings(
-    batch_size=batch_size,
-    learning_rate=0.01 / batch_size,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * batch_size))
-
-conv_projection = conv_projection if use_gpu else img_conv_layer
-
-def inception2(name, input, channels, \
-    filter1,
-    filter3R, filter3,
-    filter5R, filter5,
-    proj):
-
-    conv1 = name + '_1'
-    conv3r = name + '_3r'
-    conv3 = name + '_3'
-    conv5r = name + '_5r'
-    conv5 = name + '_5'
-    maxpool = name + '_max'
-    convproj = name + '_proj'
-
-    cov1 = img_conv_layer(
-        name=conv1,
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter1,
-        stride=1,
-        padding=0)
-
-    cov3r = img_conv_layer(
-        name=conv3r,
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter3R,
-        stride=1,
-        padding=0)
-    cov3 = img_conv_layer(
-        name=conv3,
-        input=cov3r,
-        filter_size=3,
-        num_filters=filter3,
-        stride=1,
-        padding=1)
-
-    cov5r = img_conv_layer(
-        name=conv5r,
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter5R,
-        stride=1,
-        padding=0)
-    cov5 = img_conv_layer(
-        name=conv5,
-        input=cov5r,
-        filter_size=5,
-        num_filters=filter5,
-        stride=1,
-        padding=2)
-
-    pool1 = img_pool_layer(
-        name=maxpool,
-        input=input,
-        pool_size=3,
-        num_channels=channels,
-        stride=1,
-        padding=1)
-    covprj = img_conv_layer(
-        name=convproj,
-        input=pool1,
-        filter_size=1,
-        num_filters=proj,
-        stride=1,
-        padding=0)
-
-    cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
-    return cat
-
-def inception(name, input, channels, \
-    filter1,
-    filter3R, filter3,
-    filter5R, filter5,
-    proj):
-
-    cov1 = conv_projection(
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter1,
-        stride=1,
-        padding=0)
-
-    cov3r = img_conv_layer(
-        name=name + '_3r',
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter3R,
-        stride=1,
-        padding=0)
-    cov3 = conv_projection(
-        input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
-
-    cov5r = img_conv_layer(
-        name=name + '_5r',
-        input=input,
-        filter_size=1,
-        num_channels=channels,
-        num_filters=filter5R,
-        stride=1,
-        padding=0)
-    cov5 = conv_projection(
-        input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
-
-    pool1 = img_pool_layer(
-        name=name + '_max',
-        input=input,
-        pool_size=3,
-        num_channels=channels,
-        stride=1,
-        padding=1)
-    covprj = conv_projection(
-        input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
-
-    cat = concat_layer(
-        name=name,
-        input=[cov1, cov3, cov5, covprj],
-        bias_attr=True if use_gpu else False,
-        act=ReluActivation())
-    return cat
-
-
-data = data_layer(name="input", size=3 * height * width)
-
-# stage 1
-conv1 = img_conv_layer(
-    name="conv1",
-    input=data,
-    filter_size=7,
-    num_channels=3,
-    num_filters=64,
-    stride=2,
-    padding=3)
-pool1 = img_pool_layer(
-    name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
-
-# stage 2
-conv2_1 = img_conv_layer(
-    name="conv2_1",
-    input=pool1,
-    filter_size=1,
-    num_filters=64,
-    stride=1,
-    padding=0)
-conv2_2 = img_conv_layer(
-    name="conv2_2",
-    input=conv2_1,
-    filter_size=3,
-    num_filters=192,
-    stride=1,
-    padding=1)
-pool2 = img_pool_layer(
-    name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
-
-# stage 3
-ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
-ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
-pool3 = img_pool_layer(
-    name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
-
-# stage 4
-ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
-ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
-ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
-ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
-ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
-pool4 = img_pool_layer(
-    name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
-
-# stage 5
-ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
-ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
-pool5 = img_pool_layer(
-    name="pool5",
-    input=ince5b,
-    num_channels=1024,
-    pool_size=7,
-    stride=7,
-    pool_type=AvgPooling())
-
-# We remove loss1 and loss2 for all system when testing benchmark
-# output 1
-# pool_o1 = img_pool_layer(name="pool_o1", input=ince4a, num_channels=512, pool_size=5, stride=3, pool_type=AvgPooling())
-# conv_o1 = img_conv_layer(name="conv_o1", input=pool_o1, filter_size=1, num_filters=128, stride=1, padding=0)
-# fc_o1 = fc_layer(name="fc_o1", input=conv_o1, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
-# out1 = fc_layer(name="output1", input=fc_o1,  size=1000, act=SoftmaxActivation())
-# loss1 = cross_entropy(name='loss1', input=out1, label=lab, coeff=0.3) 
-
-# output 2
-#pool_o2 = img_pool_layer(name="pool_o2", input=ince4d, num_channels=528, pool_size=5, stride=3, pool_type=AvgPooling())
-#conv_o2 = img_conv_layer(name="conv_o2", input=pool_o2, filter_size=1, num_filters=128, stride=1, padding=0)
-#fc_o2 = fc_layer(name="fc_o2", input=conv_o2, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
-#out2 = fc_layer(name="output2", input=fc_o2, size=1000, act=SoftmaxActivation())
-#loss2 = cross_entropy(name='loss2', input=out2, label=lab, coeff=0.3) 
-
-# output 3
-dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
-out3 = fc_layer(
-    name="output3", input=dropout, size=1000, act=SoftmaxActivation())
-
-if is_infer:
-    outputs(out3)
-else:
-    lab = data_layer(name="label", size=num_class)
-    loss3 = cross_entropy(name='loss3', input=out3, label=lab)
-    outputs(loss3)
--- a/benchmark/paddle/image/plotlog.py
+++ b/benchmark/paddle/image/plotlog.py
@ -1,114 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import argparse
-import matplotlib.pyplot as plt
-
-
-def parse_args():
-    parser = argparse.ArgumentParser('Parse Log')
-    parser.add_argument(
-        '--file_path', '-f', type=str, help='the path of the log file')
-    parser.add_argument(
-        '--sample_rate',
-        '-s',
-        type=float,
-        default=1.0,
-        help='the rate to take samples from log')
-    parser.add_argument(
-        '--log_period', '-p', type=int, default=1, help='the period of log')
-
-    args = parser.parse_args()
-    return args
-
-
-def parse_file(file_name):
-    loss = []
-    error = []
-    with open(file_name) as f:
-        for i, line in enumerate(f):
-            line = line.strip()
-            if not line.startswith('pass'):
-                continue
-            line_split = line.split(' ')
-            if len(line_split) != 5:
-                continue
-
-            loss_str = line_split[2][:-1]
-            cur_loss = float(loss_str.split('=')[-1])
-            loss.append(cur_loss)
-
-            err_str = line_split[3][:-1]
-            cur_err = float(err_str.split('=')[-1])
-            error.append(cur_err)
-
-    accuracy = [1.0 - err for err in error]
-
-    return loss, accuracy
-
-
-def sample(metric, sample_rate):
-    interval = int(1.0 / sample_rate)
-    if interval > len(metric):
-        return metric[:1]
-
-    num = len(metric) / interval
-    idx = [interval * i for i in range(num)]
-    metric_sample = [metric[id] for id in idx]
-    return metric_sample
-
-
-def plot_metric(metric,
-                batch_id,
-                graph_title,
-                line_style='b-',
-                line_label='y',
-                line_num=1):
-    plt.figure()
-    plt.title(graph_title)
-    if line_num == 1:
-        plt.plot(batch_id, metric, line_style, label=line_label)
-    else:
-        for i in range(line_num):
-            plt.plot(batch_id, metric[i], line_style[i], label=line_label[i])
-    plt.xlabel('batch')
-    plt.ylabel(graph_title)
-    plt.legend()
-    plt.savefig(graph_title + '.jpg')
-    plt.close()
-
-
-def main():
-    args = parse_args()
-    assert args.sample_rate > 0. and args.sample_rate <= 1.0, "The sample rate should in the range (0, 1]."
-
-    loss, accuracy = parse_file(args.file_path)
-    batch = [args.log_period * i for i in range(len(loss))]
-
-    batch_sample = sample(batch, args.sample_rate)
-    loss_sample = sample(loss, args.sample_rate)
-    accuracy_sample = sample(accuracy, args.sample_rate)
-
-    plot_metric(loss_sample, batch_sample, 'loss', line_label='loss')
-    plot_metric(
-        accuracy_sample,
-        batch_sample,
-        'accuracy',
-        line_style='g-',
-        line_label='accuracy')
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmark/paddle/image/provider.py
+++ b/benchmark/paddle/image/provider.py
@ -1,47 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import io, os
-import random
-import numpy as np
-from paddle.trainer.PyDataProvider2 import *
-
-
-def initHook(settings, height, width, color, num_class, **kwargs):
-    settings.height = height
-    settings.width = width
-    settings.color = color
-    settings.num_class = num_class
-    if settings.color:
-        settings.data_size = settings.height * settings.width * 3
-    else:
-        settings.data_size = settings.height * settings.width
-    settings.is_infer = kwargs.get('is_infer', False)
-    settings.num_samples = kwargs.get('num_samples', 2560)
-    if settings.is_infer:
-        settings.slots = [dense_vector(settings.data_size)]
-    else:
-        settings.slots = [dense_vector(settings.data_size), integer_value(1)]
-
-
-@provider(
-    init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_list):
-    for i in xrange(settings.num_samples):
-        img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
-        if settings.is_infer:
-            yield img.astype('float32')
-        else:
-            lab = random.randint(0, settings.num_class - 1)
-            yield img.astype('float32'), int(lab)
--- a/benchmark/paddle/image/resnet.py
+++ b/benchmark/paddle/image/resnet.py
@ -1,230 +0,0 @@
-#!/usr/bin/env python
-from paddle.trainer_config_helpers import *
-
-height = 224
-width = 224
-num_class = 1000
-batch_size = get_config_arg('batch_size', int, 64)
-layer_num = get_config_arg("layer_num", int, 50)
-is_infer = get_config_arg("is_infer", bool, False)
-num_samples = get_config_arg('num_samples', int, 2560)
-
-args = {
-    'height': height,
-    'width': width,
-    'color': True,
-    'num_class': num_class,
-    'is_infer': is_infer,
-    'num_samples': num_samples
-}
-define_py_data_sources2(
-    "train.list" if not is_infer else None,
-    "test.list" if is_infer else None,
-    module="provider",
-    obj="process",
-    args=args)
-
-settings(
-    batch_size=batch_size,
-    learning_rate=0.01 / batch_size,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * batch_size))
-
-
-#######################Network Configuration #############
-def conv_bn_layer(name,
-                  input,
-                  filter_size,
-                  num_filters,
-                  stride,
-                  padding,
-                  channels=None,
-                  active_type=ReluActivation()):
-    """
-    A wrapper for conv layer with batch normalization layers.
-    Note:
-    conv layer has no activation.
-    """
-
-    tmp = img_conv_layer(
-        name=name + "_conv",
-        input=input,
-        filter_size=filter_size,
-        num_channels=channels,
-        num_filters=num_filters,
-        stride=stride,
-        padding=padding,
-        act=LinearActivation(),
-        bias_attr=False)
-    return batch_norm_layer(
-        name=name + "_bn",
-        input=tmp,
-        act=active_type,
-        use_global_stats=is_infer)
-
-
-def bottleneck_block(name, input, num_filters1, num_filters2):
-    """
-    A wrapper for bottlenect building block in ResNet.
-    Last conv_bn_layer has no activation.
-    Addto layer has activation of relu.
-    """
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=1,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[input, last_name], act=ReluActivation())
-
-
-def mid_projection(name, input, num_filters1, num_filters2, stride=2):
-    """
-    A wrapper for middile projection in ResNet.
-    projection shortcuts are used for increasing dimensions,
-    and other shortcuts are identity
-    branch1: projection shortcuts are used for increasing
-    dimensions, has no activation.
-    branch2x: bottleneck building block, shortcuts are identity.
-    """
-    # stride = 2
-    branch1 = conv_bn_layer(
-        name=name + '_branch1',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=stride,
-        padding=0,
-        active_type=LinearActivation())
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=stride,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
-
-
-img = data_layer(name='image', size=height * width * 3)
-
-
-def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
-    """
-    A wrapper for 50,101,152 layers of ResNet.
-    res2_num: number of blocks stacked in conv2_x
-    res3_num: number of blocks stacked in conv3_x
-    res4_num: number of blocks stacked in conv4_x
-    res5_num: number of blocks stacked in conv5_x
-    """
-    # For ImageNet
-    # conv1: 112x112
-    tmp = conv_bn_layer(
-        "conv1",
-        input=img,
-        filter_size=7,
-        channels=3,
-        num_filters=64,
-        stride=2,
-        padding=3)
-    tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
-
-    # conv2_x: 56x56
-    tmp = mid_projection(
-        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
-    for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-
-    # conv3_x: 28x28
-    tmp = mid_projection(
-        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
-    for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res3_" + str(i),
-            input=tmp,
-            num_filters1=128,
-            num_filters2=512)
-
-    # conv4_x: 14x14
-    tmp = mid_projection(
-        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
-    for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res4_" + str(i),
-            input=tmp,
-            num_filters1=256,
-            num_filters2=1024)
-
-    # conv5_x: 7x7
-    tmp = mid_projection(
-        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
-    for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res5_" + str(i),
-            input=tmp,
-            num_filters1=512,
-            num_filters2=2048)
-
-    tmp = img_pool_layer(
-        name='avgpool',
-        input=tmp,
-        pool_size=7,
-        stride=1,
-        pool_type=AvgPooling())
-
-    return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation())
-
-
-if layer_num == 50:
-    resnet = deep_res_net(3, 4, 6, 3)
-elif layer_num == 101:
-    resnet = deep_res_net(3, 4, 23, 3)
-elif layer_num == 152:
-    resnet = deep_res_net(3, 8, 36, 3)
-else:
-    print("Wrong layer number.")
-
-if is_infer:
-    outputs(resnet)
-else:
-    lbl = data_layer(name="label", size=num_class)
-    loss = cross_entropy(name='loss', input=resnet, label=lbl)
-    outputs(loss)
--- a/benchmark/paddle/image/run.sh
+++ b/benchmark/paddle/image/run.sh
@ -1,53 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
-  cfg=$1
-  thread=$2
-  bz=$3
-  args="batch_size=$3"
-  prefix=$4
-  paddle train --job=time \
-    --config=$cfg \
-    --use_gpu=True \
-    --trainer_count=$thread \
-    --log_period=10 \
-    --test_period=100 \
-    --config_args=$args \
-    > logs/$prefix-${thread}gpu-$bz.log 2>&1 
-}
-
-if [ ! -d "train.list" ]; then
-  echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-#========single-gpu=========#
-# alexnet
-train alexnet.py 1 64 alexnet
-train alexnet.py 1 128 alexnet
-train alexnet.py 1 256 alexnet
-train alexnet.py 1 512 alexnet
-
-# googlenet
-train googlenet.py 1 64 googlenet
-train googlenet.py 1 128 googlenet
-train googlenet.py 1 256 googlenet
-
-# smallnet
-train smallnet_mnist_cifar.py 1 64 smallnet
-train smallnet_mnist_cifar.py 1 128 smallnet
-train smallnet_mnist_cifar.py 1 256 smallnet
-train smallnet_mnist_cifar.py 1 512 smallnet
-
-
-############################
-#========multi-gpus=========#
-train alexnet.py 4 512 alexnet
-train alexnet.py 4 1024 alexnet
-
-train googlenet.py 4 512 googlenet 
-train googlenet.py 4 1024 googlenet
--- a/benchmark/paddle/image/run_mkl_infer.sh
+++ b/benchmark/paddle/image/run_mkl_infer.sh
@ -1,89 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function clock_to_seconds() {
-  hours=`echo $1 | awk -F ':' '{print $1}'`
-  mins=`echo $1 | awk -F ':' '{print $2}'`
-  secs=`echo $1 | awk -F ':' '{print $3}'`
-  echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
-}
-
-function infer() {
-  unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
-  topology=$1
-  layer_num=$2
-  bs=$3
-  use_mkldnn=$4
-  if [ $4 == "True" ]; then
-    thread=1
-    log="logs/infer-${topology}-${layer_num}-mkldnn-${bs}.log"
-  elif [ $4 == "False" ]; then
-    thread=`nproc`
-    if [ $thread -gt $bs ]; then
-      thread=$bs
-    fi
-    log="logs/infer-${topology}-${layer_num}-${thread}mklml-${bs}.log"
-  else
-    echo "Wrong input $4, use True or False."
-    exit 0
-  fi
-
-  models_in="models/${topology}-${layer_num}/pass-00000/"
-  if [ ! -d $models_in ]; then
-    echo "Training model ${topology}_${layer_num}"
-    paddle train --job=train \
-      --config="${topology}.py" \
-      --use_mkldnn=True \
-      --use_gpu=False \
-      --trainer_count=1 \
-      --num_passes=1 \
-      --save_dir="models/${topology}-${layer_num}" \
-      --config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \
-      > /dev/null 2>&1
-    echo "Done"
-  fi
-  log_period=$((256 / bs))
-  paddle train --job=test \
-    --config="${topology}.py" \
-    --use_mkldnn=$use_mkldnn \
-    --use_gpu=False \
-    --trainer_count=$thread \
-    --log_period=$log_period \
-    --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \
-    --init_model_path=$models_in \
-    2>&1 | tee ${log}
-
-  # calculate the last 5 logs period time of 1280 samples,
-  # the time before are burning time.
-  start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
-  end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
-  start_sec=`clock_to_seconds $start`
-  end_sec=`clock_to_seconds $end`
-  fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'`
-  echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
-  echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
-  echo " " > train.list
-fi
-if [ ! -f "test.list" ]; then
-  echo " " > test.list
-fi
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-if [ ! -d "models" ]; then
-  mkdir -p models
-fi
-
-# inference benchmark
-for use_mkldnn in True False; do
-  for batchsize in 1 2 4 8 16; do
-    infer vgg 19 $batchsize $use_mkldnn
-    infer resnet 50 $batchsize $use_mkldnn
-    infer googlenet v1 $batchsize $use_mkldnn
-    infer alexnet 2 $batchsize $use_mkldnn
-  done
-done
--- a/benchmark/paddle/image/run_mkl_train.sh
+++ b/benchmark/paddle/image/run_mkl_train.sh
@ -1,54 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
-  unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY
-  topology=$1
-  layer_num=$2
-  bs=$3
-  use_mkldnn=$4
-  if [ $4 == "True" ]; then
-    thread=1
-    log="logs/train-${topology}-${layer_num}-mkldnn-${bs}.log"
-  elif [ $4 == "False" ]; then
-    thread=`nproc`
-    # each trainer_count use only 1 core to avoid conflict
-    log="logs/train-${topology}-${layer_num}-${thread}mklml-${bs}.log"
-  else
-    echo "Wrong input $4, use True or False."
-    exit 0
-  fi
-  args="batch_size=${bs},layer_num=${layer_num}"
-  config="${topology}.py"
-  paddle train --job=time \
-    --config=$config \
-    --use_mkldnn=$use_mkldnn \
-    --use_gpu=False \
-    --trainer_count=$thread \
-    --log_period=10 \
-    --test_period=100 \
-    --config_args=$args \
-    2>&1 | tee ${log} 
-
-  avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
-  fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
-  echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
-  echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-# training benchmark
-for use_mkldnn in True False; do
-  for batchsize in 64 128 256; do
-    train vgg 19 $batchsize $use_mkldnn
-    train resnet 50 $batchsize $use_mkldnn
-    train googlenet v1 $batchsize $use_mkldnn
-    train alexnet 2 $batchsize $use_mkldnn
-  done
-done
--- a/benchmark/paddle/image/run_openblas_infer.sh
+++ b/benchmark/paddle/image/run_openblas_infer.sh
@ -1,71 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function clock_to_seconds() {
-  hours=`echo $1 | awk -F ':' '{print $1}'`
-  mins=`echo $1 | awk -F ':' '{print $2}'`
-  secs=`echo $1 | awk -F ':' '{print $3}'`
-  echo `awk 'BEGIN{printf "%.2f",('$secs' + '$mins' * 60 + '$hours' * 3600)}'`
-}
-
-function infer() {
-  export OPENBLAS_MAIN_FREE=1
-  topology=$1
-  layer_num=$2
-  bs=$3
-  trainers=`nproc`
-  if [ $trainers -gt $bs ]; then
-    trainers=$bs
-  fi
-  log="logs/infer-${topology}-${layer_num}-${trainers}openblas-${bs}.log"
-  threads=$((`nproc` / trainers))
-  if [ $threads -eq 0 ]; then
-    threads=1
-  fi
-  export OPENBLAS_NUM_THREADS=$threads
-
-  models_in="models/${topology}-${layer_num}/pass-00000/"
-  if [ ! -d $models_in ]; then
-    echo "./run_mkl_infer.sh to save the model first"
-    exit 0
-  fi
-  log_period=$((32 / bs))
-  paddle train --job=test \
-    --config="${topology}.py" \
-    --use_mkldnn=False \
-    --use_gpu=False \
-    --trainer_count=$trainers \
-    --log_period=$log_period \
-    --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \
-    --init_model_path=$models_in \
-    2>&1 | tee ${log}
-
-  # calculate the last 5 logs period time of 160(=32*5) samples,
-  # the time before are burning time.
-  start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
-  end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
-  start_sec=`clock_to_seconds $start`
-  end_sec=`clock_to_seconds $end`
-  fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'`
-  echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
-  echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
-  echo " " > train.list
-fi
-if [ ! -f "test.list" ]; then
-  echo " " > test.list
-fi
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-# inference benchmark
-for batchsize in 1 2 4 8 16; do
-  infer vgg 19 $batchsize
-  infer resnet 50 $batchsize 
-  infer googlenet v1 $batchsize
-  infer alexnet 2 $batchsize
-done
--- a/benchmark/paddle/image/run_openblas_train.sh
+++ b/benchmark/paddle/image/run_openblas_train.sh
@ -1,43 +0,0 @@
-#!/bin/bash
-
-set -e
-
-function train() {
-  export OPENBLAS_NUM_THREADS=1
-  topology=$1
-  layer_num=$2
-  bs=$3
-  thread=`nproc`
-  # each trainer_count use only 1 core to avoid conflict
-  log="logs/train-${topology}-${layer_num}-${thread}openblas-${bs}.log"
-  args="batch_size=${bs},layer_num=${layer_num}"
-  config="${topology}.py"
-  paddle train --job=time \
-    --config=$config \
-    --use_mkldnn=False \
-    --use_gpu=False \
-    --trainer_count=$thread \
-    --log_period=3 \
-    --test_period=30 \
-    --config_args=$args \
-    2>&1 | tee ${log} 
-
-  avg_time=`tail ${log} -n 1 | awk -F ' ' '{print $8}' | sed 's/avg=//'`
-  fps=`awk 'BEGIN{printf "%.2f",('$bs' / '$avg_time' * 1000)}'`
-  echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
-}
-
-if [ ! -f "train.list" ]; then
-  echo " " > train.list
-fi
-if [ ! -d "logs" ]; then
-  mkdir logs
-fi
-
-# training benchmark
-for batchsize in 64 128 256; do
-  train vgg 19 $batchsize
-  train resnet 50 $batchsize
-  train googlenet v1 $batchsize
-  train alexnet 2 $batchsize
-done
--- a/benchmark/paddle/image/smallnet_mnist_cifar.py
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@ -1,49 +0,0 @@
-#!/usr/bin/env python
-
-from paddle.trainer_config_helpers import *
-
-height = 32
-width = 32
-num_class = 10
-
-batch_size = get_config_arg('batch_size', int, 128)
-
-args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
-define_py_data_sources2(
-    "train.list", None, module="provider", obj="process", args=args)
-
-settings(
-    batch_size=batch_size,
-    learning_rate=0.01 / batch_size,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * batch_size))
-
-# conv1
-net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(
-    input=net,
-    filter_size=5,
-    num_channels=3,
-    num_filters=32,
-    stride=1,
-    padding=2)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
-
-# conv2
-net = img_conv_layer(
-    input=net, filter_size=5, num_filters=32, stride=1, padding=2)
-net = img_pool_layer(
-    input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
-
-# conv3
-net = img_conv_layer(
-    input=net, filter_size=3, num_filters=64, stride=1, padding=1)
-net = img_pool_layer(
-    input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
-
-net = fc_layer(input=net, size=64, act=ReluActivation())
-net = fc_layer(input=net, size=10, act=SoftmaxActivation())
-
-lab = data_layer('label', num_class)
-loss = classification_cost(input=net, label=lab)
-outputs(loss)
--- a/Show More
+++ b/Show More