Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_docker_build

7 years ago · ee65272933
parent a58d228359 4fa3cee549
commit ee65272933
2484 changed files with 108342 additions and 24721 deletions
--- a/.clang-format
+++ b/.clang-format
@ -19,7 +19,7 @@ BasedOnStyle:  Google
 IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
-AccessModifierOffset: -2  # The private/protected/public has no indent in class
+AccessModifierOffset: -1  # The private/protected/public has no indent in class
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
 BinPackParameters: false
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,7 @@ python/paddle/v2/fluid/tests/book/image_classification_resnet.inference.model/
 python/paddle/v2/fluid/tests/book/image_classification_vgg.inference.model/
 python/paddle/v2/fluid/tests/book/label_semantic_roles.inference.model/
 *.DS_Store
 *.vs
 build/
 build_doc/
 *.user
@ -15,6 +16,7 @@ build_doc/
 .cproject
 .pydevproject
 .settings/
 CMakeSettings.json
 Makefile
 .test_env/
 third_party/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -23,7 +23,7 @@ repos:
    -   id: clang-format-with-version-check
        name: clang-format
        description: Format files with ClangFormat.
-        entry: bash ./.clang_format.hook -i
+        entry: bash ./tools/codestyle/clang_format.hook -i
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
 -   repo: local
@ -34,6 +34,14 @@ repos:
        entry: bash ./tools/codestyle/cpplint_pre_commit.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
 -   repo: local
    hooks:
    -   id: pylint-doc-string
        name: pylint
        description: Check python docstring style using docstring_checker.
        entry: bash ./tools/codestyle/pylint_pre_commit.hook
        language: system
        files: \.(py)$
 -   repo: https://github.com/PaddlePaddle/pre-commit-golang
    sha: 8337620115c25ff8333f1b1a493bd031049bd7c0
    hooks:
@ -44,7 +52,7 @@ repos:
    hooks:
    -   id: copyright_checker
        name: copyright_checker
-        entry: python ./.copyright.hook
+        entry: python ./tools/codestyle/copyright.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
--- a/.travis.yml
+++ b/.travis.yml
@ -18,6 +18,8 @@ env:
 addons:
  ssh_known_hosts: 13.229.163.131
 before_install:
  # For pylint dockstring checker
  - sudo pip install pylint pytest astroid isort
  - |
    function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
 script:
@ -25,15 +27,6 @@ script:
    # 43min timeout
    paddle/scripts/paddle_docker_build.sh ${JOB}
    if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
  - |
    if [[ "$JOB" != "doc" ]]; then exit 0; fi;
    # For document only
    if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi;
    if [[ "$TRAVIS_BRANCH" != "develop"  && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi;
    export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh
    export DOCS_DIR=`pwd`
    cd ..
    curl $DEPLOY_DOCS_SH | bash -s $CONTENT_DEC_PASSWD $TRAVIS_BRANCH $DOCS_DIR $DOCS_DIR/build/doc/
 notifications:
  email:
    on_success: change
--- a/AUTHORS.md
+++ b/AUTHORS.md
@ -4,6 +4,7 @@
 | backyes | Yan-Fei Wang |
 | baiyfbupt | Yi-Fan Bai |
 | beckett1124 | Bin Qi |
 | ChengduoZH | Cheng-Duo Zhao|
 | chengxiaohua1105 | Xiao-Hua Cheng |
 | cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
 | cxysteven | Xing-Yi Cheng |
@ -21,6 +22,7 @@
 | jczaja | Jacek Czaja |
 | JiayiFeng | Jia-Yi Feng |
 | kbinias | Krzysztof Binias |
 | kexinzhao | Ke-Xin Zhao |
 | kuke | Yi-Bing Liu |
 | lcy-seso | Ying Cao |
 | lipeng-unisound | Peng Li |
@ -44,6 +46,7 @@
 | tianbingsz | Tian-Bing Xu |
 | tpatejko | Tomasz Patejko |
 | typhoonzero | Yi Wu |
 | velconia | Qi-Yang Min |
 | wanghaoshuang | Hao-Shuang Wang |
 | wangyang59 | Yang Wang |
 | wangzhen-nlp | Zhen Wang |
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -24,8 +24,10 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
        "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
 message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
        "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 if(WIN32)
    set(CMAKE_STATIC_LIBRARY_PREFIX lib)
 endif(WIN32)
 find_package(Sphinx)
 if(NOT CMAKE_CROSSCOMPILING)
    find_package(CUDA QUIET)
 endif(NOT CMAKE_CROSSCOMPILING)
@ -42,7 +44,6 @@ option(WITH_MKL         "Compile PaddlePaddle with MKL support."        ${AVX_FO
 option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
 option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        OFF)
 option(WITH_SWIG_PY     "Compile PaddlePaddle with inference api"       ON)
 option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check"         ON)
 option(WITH_PYTHON      "Compile PaddlePaddle with python interpreter"  ON)
 option(WITH_DOUBLE      "Compile PaddlePaddle with double precision"    OFF)
 option(WITH_RDMA        "Compile PaddlePaddle with RDMA support"        OFF)
@ -57,10 +58,25 @@ option(WITH_FLUID_ONLY  "Compile PaddlePaddle fluid only"               OFF)
 option(WITH_GOLANG      "Compile PaddlePaddle with GOLANG"              OFF)
 option(GLIDE_INSTALL    "Download and install go dependencies "         ON)
 option(USE_NNPACK       "Compile PaddlePaddle with NNPACK library"      OFF)
-option(WITH_DISTRIBUTE  "Compile with grpc distributed support"         OFF)
+option(WITH_DISTRIBUTE  "Compile with distributed support"              OFF)
 option(USE_EIGEN_FOR_BLAS   "Use matrix multiplication in Eigen"        OFF)
 option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen"            OFF)
 option(WITH_ARM_FP16    "Use half precision support on armv8.2-a cpu"   OFF)
 option(WITH_FAST_BUNDLE_TEST    "Bundle tests that can be run in a single process together to reduce launch overhead"   OFF)
 option(WITH_CONTRIB     "Compile the third-party contributation"        OFF)
 option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF)
 option(WITH_ANAKIN      "Compile with Anakin library"                   OFF)
 option(WITH_GRPC     "Use grpc as the default rpc framework"            ${WITH_DISTRIBUTE})
 option(WITH_BRPC_RDMA     "Use brpc rdma as the rpc protocal"           OFF)
 option(WITH_INFERENCE    "Compile fluid inference library"              ON)
 option(WITH_SYSTEM_BLAS   "Use system blas library"           OFF)
 option(PY_VERSION       "Compile PaddlePaddle with python3 support"     ${PY_VERSION})
 # PY_VERSION
 if(NOT PY_VERSION)
  set(PY_VERSION 2.7)
 endif()
 set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
 # CMAKE_BUILD_TYPE
 if(NOT CMAKE_BUILD_TYPE)
@ -98,9 +114,17 @@ if(ANDROID OR IOS)
    add_definitions(-DPADDLE_MOBILE_INFERENCE)
 endif()
 if (APPLE OR WIN32)
    set(WITH_MKL OFF CACHE STRING
        "Disable MKL for building on mac and windows" FORCE)
 endif()
 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
  "A path setting third party libraries download & build directories.")
 set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING
  "A path setting fluid shared and static libraries")
 if (WITH_C_API AND WITH_PYTHON)
  message(WARNING "It is suggest not embedded a python interpreter in Paddle "
    "when using C-API. It will give an unpredictable behavior when using a "
@ -118,16 +142,23 @@ else()
 endif()
 set(WITH_MKLML ${WITH_MKL})
-if (WITH_MKL AND AVX2_FOUND)
+if (NOT DEFINED WITH_MKLDNN)
    if (WITH_MKL AND AVX2_FOUND)
        set(WITH_MKLDNN ON)
-else()
+    else()
        message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN")
        set(WITH_MKLDNN OFF)
    endif()
 endif()
 if (REPLACE_ENFORCE_GLOG)
  add_definitions("-DREPLACE_ENFORCE_GLOG")
 endif()
 ########################################################################################
 include(external/mklml)     # download mklml package
 include(external/xbyak)     # download xbyak package
 include(external/libxsmm)   # download, build, install libxsmm
 include(external/zlib)      # download, build, install zlib
 include(external/gflags)    # download, build, install gflags
 include(external/glog)      # download, build, install glog
@ -137,34 +168,68 @@ include(external/python)    # download, build, install python
 include(external/openblas)  # download, build, install openblas
 include(external/mkldnn)    # download, build, install mkldnn
 include(external/swig)      # download, build, install swig
 include(external/warpctc)   # download, build, install warpctc
 include(external/boost)     # download boost
 include(external/any)       # download libn::any
 include(external/eigen)     # download eigen3
 include(external/pybind11)  # download pybind11
 include(external/cares)
-include(external/grpc)
+include(external/cub)
 if (NOT WIN32)
 # there is no official support of snappystream, warpctc, nccl, cupti in windows
 include(external/snappy)    # download snappy
-include(external/snappystream)
+include(external/snappystream) # download snappystream
-include(external/threadpool)
+include(external/warpctc)   # download, build, install warpctc
 include(cupti)
 endif (NOT WIN32)
 if(WITH_DISTRIBUTE)
    if(WITH_GRPC)
        include(external/grpc)
        message(STATUS "Use grpc framework.")
    else()
        message(STATUS "Use brpc framework.")
        include(external/leveldb)
        include(external/brpc)
    endif()
 endif()
 if(WITH_BRPC_RDMA)
    message(STATUS "Use brpc with rdma.")
    if(WITH_GRPC)
        message(FATAL_ERROR "Can't use grpc with brpc rdma.")
    endif()
    if(NOT WITH_DISTRIBUTE)
        message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
    endif()
 endif()
 include(external/threadpool)
 include(flags)              # set paddle compile flags
 include(cudnn)              # set cudnn libraries, must before configure
 include(cupti)
 include(configure)          # add paddle env configuration
 if(WITH_GPU)
    include(cuda)
    include(tensorrt)
    include(external/anakin)
 elseif()
    set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE)
 endif()
 include(generic)            # simplify cmake module
 include(package)            # set paddle packages
 include(cpplint)            # set paddle c++ style
 include(ccache)             # set ccache for compilation
 include(util)               # set unittest and link libs
 include(rdma)               # set rdma libraries
 include(flags)              # set paddle compile flags
 include(version)            # set PADDLE_VERSION
 include(coveralls)          # set code coverage
 include(inference_lib)      # add paddle fluid inference libraries
 include_directories("${PADDLE_SOURCE_DIR}")
-include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include")
+include_directories("${PADDLE_SOURCE_DIR}/paddle/legacy/cuda/include")
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
@ -177,11 +242,6 @@ set(EXTERNAL_LIBS
    ${PYTHON_LIBRARIES}
 )
 if(WITH_GPU)
    include(cuda)
    include(tensorrt)
 endif(WITH_GPU)
 if(WITH_AMD_GPU)
    find_package(HIP)
    include(hip)
@ -191,6 +251,10 @@ if(WITH_MKLML)
    list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
 endif()
 if(WITH_LIBXSMM)
    list(APPEND EXTERNAL_LIBS ${LIBXSMM_LIBS})
 endif()
 if(WITH_MKLDNN)
    list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
 endif()
@ -202,10 +266,10 @@ endif(USE_NNPACK)
 add_subdirectory(proto)
-if(NOT MOBILE_INFERENCE)
+if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY)
    # "add_subdirectory(go)" should be placed after the following loine,
    # because it depends on paddle/optimizer.
-    add_subdirectory(paddle/optimizer)
+    add_subdirectory(paddle/legacy/optimizer)
 endif()
 # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
@ -226,5 +290,7 @@ if(WITH_PYTHON)
 endif()
 if(WITH_DOC)
    find_package(Sphinx REQUIRED)
    find_python_module(recommonmark REQUIRED)
    add_subdirectory(doc)
 endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -58,6 +58,8 @@ PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-
    create mode 100644 233
   ```
 	NOTE: The `yapf` installed by `pip install pre-commit` and `conda install -c conda-forge pre-commit` is slightly different. Paddle developers use `pip install pre-commit`.
 1. Build and test
   Users can build PaddlePaddle natively on Linux and Mac OS X.  But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md).
@ -157,4 +159,4 @@ This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the
 - verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
 - verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
 - verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math)
+- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math)
--- a/14
+++ b/14
@ -23,13 +23,13 @@ ENV HOME /root
 COPY ./paddle/scripts/docker/root/ /root/
 RUN apt-get update && \
-    apt-get install -y --allow-downgrades \
+    apt-get install -y --allow-downgrades patchelf \
-    git python-pip python-dev openssh-server bison \
+    git python-pip python-dev python-opencv openssh-server bison \
    libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \
    wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
    curl sed grep graphviz libjpeg-dev zlib1g-dev  \
    python-matplotlib gcc-4.8 g++-4.8 \
-    automake locales clang-format swig doxygen cmake  \
+    automake locales clang-format swig cmake  \
    liblapack-dev liblapacke-dev \
    clang-3.8 llvm-3.8 libclang-3.8-dev \
    net-tools libtool ccache && \
@ -70,7 +70,7 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
 # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
 # version(1.7.1 for now), which causes building documentation failed.
-RUN pip install --upgrade pip==9.0.3 && \
+RUN easy_install -U pip && \
    pip install -U wheel && \
    pip install -U docopt PyYAML sphinx==1.5.6 && \
    pip install sphinx-rtd-theme==0.1.9 recommonmark
@ -79,6 +79,9 @@ RUN pip install pre-commit 'ipython==5.3.0' && \
    pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
    pip install opencv-python
 #For docstring checker
 RUN pip install pylint pytest astroid isort LinkChecker
 COPY ./python/requirements.txt /root/
 RUN pip install -r /root/requirements.txt
@ -101,6 +104,3 @@ RUN echo 'root:root' | chpasswd
 RUN sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config
 RUN sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
 EXPOSE 22
 # development image default do build work
 CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"]
--- a/Dockerfile.android
+++ b/Dockerfile.android
@ -40,5 +40,3 @@ RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
    unzip -q android-ndk-r14b-linux-x86_64.zip && \
    mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
    rm -rf /opt/android-ndk-tmp
 CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
--- a/README.md
+++ b/README.md
@ -4,7 +4,6 @@
 [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
 [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html)
 [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html)
 [![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
 [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
 [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
@ -19,6 +18,22 @@ learning to many products at Baidu.
 Our vision is to enable deep learning for everyone via PaddlePaddle.
 Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.
 ### Latest PaddlePaddle Release: [Fluid 0.14.0](https://github.com/PaddlePaddle/Paddle/tree/v0.14.0)
 ### Install Latest Stable Release:
 ```
 # Linux CPU
 pip install paddlepaddle
 # Linux GPU cuda9cudnn7
 pip install paddlepaddle-gpu
 # Linux GPU cuda8cudnn7
 pip install paddlepaddle-gpu==0.14.0.post87
 # Linux GPU cuda8cudnn5
 pip install paddlepaddle-gpu==0.14.0.post85
 # For installation on other platform, refer to http://paddlepaddle.org/
 ```
 ## Features
 - **Flexibility**
@ -62,9 +77,9 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
 ## Installation
 It is recommended to check out the
-[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/docker_install_en.html)
+[Docker installation guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/docker_install_en.html)
 before looking into the
-[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/build_from_source_en.html).
+[build from source guide](http://www.paddlepaddle.org/docs/develop/documentation/fluid/en/build_and_install/build_from_source_en.html).
 ## Documentation
--- a/benchmark/.gitignore
+++ b/benchmark/.gitignore
@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
 caffe/image/logs
 tensorflow/image/logs
 tensorflow/rnn/logs
 fluid/models/*.pyc
 fluid/logs
 fluid/nohup.out
--- a/benchmark/cluster/README.md
+++ b/benchmark/cluster/README.md
@ -1,196 +0,0 @@
 # Cluster Training Benchmark
 ## Setup
 - Platform
  - Kubernetes: v1.6.2
  - Linux Kernel: v3.10.0
 - Resource
  - CPU: 10 Cores per Pod
  - Memory: 5GB per Pod
 - Docker Image
  We use different base Docker Image to run the benchmark on Kubernetes:
  - PaddlePaddle v2: paddlepaddle/paddle:0.11.0
  - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id]
  - TensorFlow: tensorflow/tensorflow:1.5.0-rc0
 - Model
  vgg16 is used in this benchmark.
 ## Cases
 - Variable
  - Batch Size of training data.
  - PServer count of the training job.
  - The number of trainers.
 - Invariant
  - The resource of trainer/pserver Pod.
 ### Measure the Performance for Different Batch Size
 - PServer Count: 40
 - Trainer Count: 100
 - Metrics: mini-batch / sec
 <table>
 <thead>
 <tr>
 <th>Batch Size </th>
 <th> 32</th>
 <th>64</th>
 <th>128 </th>
 <th>256</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td>-</td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2  </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 </tbody>
 </table>
 ### Measure the Performance for Different PServer Count
 - Trainer Count: 100
 - Batch Size: 64
 - Metrics: mini-batch / sec
 <table>
 <thead>
 <tr>
 <th>PServer Count  </th>
 <th>10</th>
 <th>20</th>
 <th>40 </th>
 <th>60</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td>-</td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2  </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 </tr>
 </tbody>
 </table>
 ### Measure Parallel Efficiency By Increasing Trainer Count
 - PServer Count: 20
 - Batch Size: 64
 - Metrics:
 $S = \div(T1, TN)$
 which S is the ratio of T1 over TN, training time of 1 and N trainers.
 The parallel efficiency is:
 $E = \div(S, N)$
 <table>
 <thead>
 <tr>
 <th>Trainer Counter  </th>
 <th>1</th>
 <th>10</th>
 <th>20 </th>
 <th>30</th>
 <th>40</th>
 <th>50</th>
 <th>60 </th>
 <th>70</th>
 <th>80</th>
 <th>90</th>
 <th>100 </th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2  </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td>-  </td>
 <td>- </td>
 <td>-  </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 <td>- </td>
 <td>-</td>
 <td>- </td>
 <td>- </td>
 </tr>
 </tbody>
 </table>
 ## Reproduce the benchmark
 TODO
--- a/benchmark/cluster/vgg16/Dockerfile
+++ b/benchmark/cluster/vgg16/Dockerfile
@ -1,35 +0,0 @@
 FROM nvidia/cuda:8.0-cudnn5-runtime-ubuntu16.04
 # you can get mirror list here:
 # https://launchpad.net/ubuntu/+archivemirrors
 ARG UBUNTU_MIRROR
 RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
 RUN apt-get update && apt-get install -y python python-dev python-pip iputils-ping libgtk2.0-dev
 RUN pip install -U kubernetes opencv-python
 RUN pip install paddlepaddle
 # if network is slowly, you may need to add proxy here.
 # ENV https_proxy=
 RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()" | python'
 RUN pip uninstall -y paddlepaddle
 # unset proxy if it is setted.
 # ENV https_proxy=""
 # NOTE: By default CI built wheel packages turn WITH_DISTRIBUTE=OFF,
 #       so we must build one with distribute support to install in this image.
 ADD *.whl /
 RUN pip install /*.whl && rm -f /*.whl
 ENV LD_LIBRARY_PATH=/usr/local/lib
 # tf k8s
 RUN pip install tensorflow==1.4.0
 ADD tf_k8s /usr/bin
 RUN chmod +x /usr/bin/tf_k8s
 ADD vgg16_tf.py /workspace/
 # below lines may change a lot for debugging
 ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
 ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
 RUN chmod +x /usr/bin/paddle_k8s
 ADD vgg16_fluid.py vgg16_v2.py /workspace/
--- a/benchmark/cluster/vgg16/README.md
+++ b/benchmark/cluster/vgg16/README.md
@ -1,195 +0,0 @@
 # Performance for Distributed vgg16
 ## Test Result
 ### Hardware Infomation
 - CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
 - cpu MHz		: 2101.000
 - cache size	: 20480 KB
 ### Blas settings
 Setting environment variable: `MKL_NUM_THREADS=1`.
 ### Single Node Single Thread
 - Metrics: samples / sec
 <table>
 <thead>
 <tr>
 <th>Batch Size </th>
 <th> 32</th>
 <th>64</th>
 <th>128 </th>
 <th>256</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td> 15.44 </td>
 <td> 16.32 </td>
 <td> 16.74 </td>
 <td> 16.79 </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2  </td>
 <td> 15.97 </td>
 <td> 17.04 </td>
 <td> 17.60 </td>
 <td> 17.83 </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td> 9.09 </td>
 <td> 9.10 </td>
 <td> 9.24 </td>
 <td> 8.66 </td>
 </tr>
 </tbody>
 </table>
 ### Different Batch Size
 - PServer Count: 10
 - Trainer Count: 20
 - Metrics: samples / sec
 <table>
 <thead>
 <tr>
 <th>Batch Size </th>
 <th> 32</th>
 <th>64</th>
 <th>128 </th>
 <th>256</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td> 190.20 </td>
 <td> 222.15 </td>
 <td> 247.40 </td>
 <td> 258.18 </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2  </td>
 <td> 170.96 </td>
 <td> 233.71 </td>
 <td> 256.14 </td>
 <td> 329.23 </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 </tr>
 </tbody>
 </table>
 ### Accelerate Rate
 - Pserver Count: 20
 - Batch Size: 128
 - Metrics: samples / sec
 <table>
 <thead>
 <tr>
 <th>Trainer Count </th>
 <th>20</th>
 <th>40</th>
 <th>80</th>
 <th>100</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid</td>
 <td> 263.29 (78.64%) </td>
 <td> 518.80 (77.47%) </td>
 <td> 836.26 (62.44%) </td>
 <td> 1019.29 (60.89%) </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2 (need more tests)   </td>
 <td> 326.85 (92.85%) </td>
 <td> 534.58 (75.93%) </td>
 <td> 853.30 (60.60%) </td>
 <td> 1041.99 (59.20%) </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 </tr>
 </tbody>
 </table>
 ### Different Pserver Count
 - Trainer Count: 60
 - Batch Size: 128
 - Metrics: samples/ sec
 <table>
 <thead>
 <tr>
 <th>PServer Count </th>
 <th>3</th>
 <th>6</th>
 <th>10</th>
 <th>20</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td> PaddlePaddle Fluid(should fix in next PR) </td>
 <td> 589.1 </td>
 <td> 592.6 </td>
 <td> 656.4 </td>
 <td> 655.8 </td>
 </tr>
 <tr>
 <td>PaddlePaddle v2 (need more tests)   </td>
 <td> 593.4 </td>
 <td> 791.3 </td>
 <td> 729.7 </td>
 <td> 821.7 </td>
 </tr>
 <tr>
 <td>TensorFlow </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 <td> - </td>
 </tr>
 </tbody>
 </table>
 *The performance gap between Fuild and v2 comes from the network interference.*
 ## Steps to Run the Performance Test
 1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support.
 1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory.
 1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it.
 1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step).
 1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers.
 Check the logs for the distributed training progress and analyze the performance.
 ## Enable Verbos Logs
 Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` and `GLOG_logtostderr=1` to see what happend in detail.
--- a/benchmark/cluster/vgg16/fluid_pserver.yaml
+++ b/benchmark/cluster/vgg16/fluid_pserver.yaml
@ -1,72 +0,0 @@
 apiVersion: extensions/v1beta1
 kind: ReplicaSet
 metadata:
  name: vgg16job-pserver
 spec:
  replicas: 10
  template:
    metadata:
      labels:
        paddle-job-pserver: vgg16job
    spec:
      hostNetwork: true
      imagePullSecrets:
      - name: job-registry-secret
      containers:
      - name: pserver
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
        imagePullPolicy: Always
        ports:
        - name: jobport-30236
          containerPort: 30236
        env:
        - name: PADDLE_JOB_NAME
          value: vgg16job
        - name: MKL_NUM_THREADS
          value: "1"
        - name: TRAINING_ROLE
          value: "PSERVER"
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
          value: "10"
        - name: TOPOLOGY
          value: ""
        - name: ENTRY
          value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: PADDLE_INIT_PORT
          value: "30236"
        - name: PADDLE_INIT_NICS
          value: "xgbe0"
        - name: PADDLE_INIT_TRAINER_COUNT
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
          value: "1"
        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
          value: "20"
        - name: PADDLE_INIT_NUM_PASSES
          value: "1"
        - name: PADDLE_INIT_USE_GPU
          value: "0"
        - name: LD_LIBRARY_PATH
          value: "/usr/local/lib:/usr/local/nvidia/lib64"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: "status.podIP"
        command: ["paddle_k8s", "start_fluid"]
        resources:
          requests:
            memory: 10Gi
            cpu: 4
          limits:
            memory: 10Gi
            cpu: 4
--- a/benchmark/cluster/vgg16/fluid_trainer.yaml
+++ b/benchmark/cluster/vgg16/fluid_trainer.yaml
@ -1,69 +0,0 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: vgg16job-trainer
 spec:
  parallelism: 20
  completions: 20
  template:
    metadata:
      labels:
        paddle-job: vgg16job
    spec:
      imagePullSecrets:
      - name: job-registry-secret
      hostNetwork: true
      containers:
      - name: trainer
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
        imagePullPolicy: Always
        command: ["paddle_k8s", "start_fluid"]
        env:
        - name: PADDLE_JOB_NAME
          value: vgg16job
        - name: TRAINING_ROLE
          value: "TRAINER"
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
          value: "10"
        - name: TOPOLOGY
          value: ""
        - name: ENTRY
          value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: PADDLE_INIT_PORT
          value: "30236"
        - name: PADDLE_INIT_NICS
          value: "xgbe0"
        - name: PADDLE_INIT_TRAINER_COUNT
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
          value: "1"
        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
          value: "20"
        - name: PADDLE_INIT_NUM_PASSES
          value: "1"
        - name: PADDLE_INIT_USE_GPU
          value: "0"
        - name: LD_LIBRARY_PATH
          value: "/usr/local/lib:/usr/local/nvidia/lib64"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: "status.podIP"
        resources:
          requests:
            memory: 40Gi
            cpu: 2
          limits:
            memory: 40Gi
            cpu: 2
      restartPolicy: Never
--- a/benchmark/cluster/vgg16/run_vgg_dist.sh
+++ b/benchmark/cluster/vgg16/run_vgg_dist.sh
@ -1,21 +0,0 @@
 #!/bin/bash
 # Update to point to the source file.
 VGG_SRC="vgg16_fluid.py"
 export TRAINING_ROLE=PSERVER
 export TRAINERS=2
 export POD_IP=127.0.0.1
 export PADDLE_INIT_PORT=6174
 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 &
 # Need to wait for the ps to start first.
 sleep 10
 echo "done start ps"
 export TRAINING_ROLE=TRAINER
 export TRAINERS=2
 export POD_IP=127.0.0.1
 export PADDLE_INIT_PORT=6174
 CUDA_VISIBLE_DEVICES=4 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=0 &
 CUDA_VISIBLE_DEVICES=5 MKL_NUM_THREADS=1 python -u ${VGG_SRC} --local 0 --ps_host=127.0.0.1:6174 --trainer_hosts=127.0.0.1:6174 --device=GPU --task_index=1 &
--- a/benchmark/cluster/vgg16/tf_k8s
+++ b/benchmark/cluster/vgg16/tf_k8s
@ -1,82 +0,0 @@
 #!/bin/bash
 check_trainer_ret() {
  ret=$1
  stdbuf -oL echo "job returned $ret...setting pod return message..."
  stdbuf -oL echo "==============================="
  if [ $ret -eq 136 ] ; then
    echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
  elif [ $ret -eq 139 ] ; then
    echo "Segmentation Fault" > /dev/termination-log
  elif [ $ret -eq 1 ] ; then
    echo "General Error" > /dev/termination-log
  elif [ $ret -eq 134 ] ; then
    echo "Program Abort" > /dev/termination-log
  fi
  stdbuf -oL echo "termination log wroted..."
  exit $ret
 }
 g_pservers=""
 g_trainers=""
 wait_running_pods(){
  pserver_label="tf-job-pserver=${JOB_NAME}"
  trainer_label="tf-job-trainer=${JOB_NAME}"
  stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS_NUM}
  stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS_NUM}
  g_pservers=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PORT})
  g_trainers=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PORT})
 }
 start_tf_pserver(){
  wait_running_pods
  label="tf-job-pserver=${JOB_NAME}"
  pserver_id=$(python /root/k8s_tools.py fetch_id ${label})
  cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
  --job_name=${TF_JOB_NAME} --task_index=${pserver_id}"
  stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
 }
 start_tf_trainer(){
  wait_running_pods
  label="tf-job-trainer=${JOB_NAME}"
  trainer_id=$(python /root/k8s_tools.py fetch_id ${label})
  cmd="${ENTRY} --ps_hosts=${g_pservers} --worker_hosts=${g_trainers} \
  --job_name=${TF_JOB_NAME} --task_index=${trainer_id} --batch_size=${BATCH_SIZE}"
  stdbuf -oL sh -c "cd ${TRAINER_PACKAGE} && ${cmd}"
  check_trainer_ret $?
 }
 start_tf(){
    if [[ "${TF_JOB_NAME}" == "worker" ]]; then
        start_tf_trainer
    else
        start_tf_pserver
    fi
 }
 usage() {
    echo "usage: tf_k8s [<args>]:"
    echo "  start_tf         Start tensorflow jobs"
 }
 case "$1" in
    start_tf)
        start_tf
        ;;
    --help)
        usage
        ;;
    *)
        usage
        ;;
 esac
--- a/benchmark/cluster/vgg16/tf_pserver.yaml
+++ b/benchmark/cluster/vgg16/tf_pserver.yaml
@ -1,56 +0,0 @@
 apiVersion: extensions/v1beta1
 kind: ReplicaSet
 metadata:
  name: vgg16job-tf-pserver
 spec:
  replicas: 10
  template:
    metadata:
      labels:
        tf-job-pserver: vgg16job-tf
    spec:
      hostNetwork: true
      imagePullSecrets:
      - name: job-registry-secret
      containers:
      - name: pserver
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
        imagePullPolicy: Always
        command: ["tf_k8s", "start_tf"]
        ports:
        - name: jobport-30236
          containerPort: 30236
        env:
        - name: PORT
          value: "32036"
        - name: ENTRY
          value: "python vgg16_tf.py"
        - name: JOB_NAME
          value: vgg16job-tf
        - name: PSERVERS_NUM
          value: "10"
        - name: TF_JOB_NAME 
          value: "ps"
        - name: TRAINERS_NUM
          value: "20"
        - name: BATCH_SIZE
          value: "128"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: NUM_PASSES
          value: "1"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: "status.podIP"
        resources:
          requests:
            memory: 10Gi
            cpu: 4
          limits:
            memory: 10Gi
            cpu: 4
--- a/benchmark/cluster/vgg16/tf_trainer.yaml
+++ b/benchmark/cluster/vgg16/tf_trainer.yaml
@ -1,58 +0,0 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: vgg16job-tf-trainer
 spec:
  parallelism: 20
  completions: 20
  template:
    metadata:
      labels:
        tf-job-trainer: vgg16job-tf
    spec:
      imagePullSecrets:
      - name: job-registry-secret
      hostNetwork: true
      containers:
      - name: trainer
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark_tf:vgg16"
        imagePullPolicy: Always
        command: ["tf_k8s", "start_tf"]
        ports:
        - name: jobport-30236
          containerPort: 30236
        env:
        - name: PORT
          value: "32036"
        - name: JOB_NAME
          value: vgg16job-tf
        - name: TF_JOB_NAME 
          value: "worker"
        - name: ENTRY
          value: "python vgg16_tf.py"
        - name: PSERVERS_NUM
          value: "10"
        - name: BATCH_SIZE
          value: "128"
        - name: TRAINERS_NUM
          value: "20"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: NUM_PASSES
          value: "1"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        - name: POD_IP
          valueFrom:
            fieldRef:
              fieldPath: "status.podIP"
        resources:
          requests:
            memory: 40Gi
            cpu: 2
          limits:
            memory: 40Gi
            cpu: 2
      restartPolicy: Never
--- a/benchmark/cluster/vgg16/v2_pserver.yaml
+++ b/benchmark/cluster/vgg16/v2_pserver.yaml
@ -1,64 +0,0 @@
 apiVersion: extensions/v1beta1
 kind: ReplicaSet
 metadata:
  name: vgg16v2job-pserver
 spec:
  replicas: 10
  template:
    metadata:
      labels:
        paddle-job-pserver: vgg16v2job
    spec:
      hostNetwork: true
      imagePullSecrets:
      - name: job-registry-secret
      containers:
      - name: pserver
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
        imagePullPolicy: Always
        ports:
        - name: jobport-30236
          containerPort: 30236
        env:
        - name: PADDLE_JOB_NAME
          value: vgg16v2job
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
          value: "10"
        - name: TOPOLOGY
          value: ""
        - name: ENTRY
          value: "python train.py"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: PADDLE_INIT_PORT
          value: "30236"
        - name: PADDLE_INIT_NICS
          value: "xgbe0"
        - name: PADDLE_INIT_TRAINER_COUNT
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
          value: "1"
        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
          value: "20"
        - name: PADDLE_INIT_NUM_PASSES
          value: "1"
        - name: PADDLE_INIT_USE_GPU
          value: "0"
        - name: LD_LIBRARY_PATH
          value: "/usr/local/lib:/usr/local/nvidia/lib64"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        command: ["paddle_k8s", "start_pserver"]
        resources:
          requests:
            memory: 10Gi
            cpu: 4
          limits:
            memory: 10Gi
            cpu: 4
--- a/benchmark/cluster/vgg16/v2_trainer.yaml
+++ b/benchmark/cluster/vgg16/v2_trainer.yaml
@ -1,65 +0,0 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: vgg16v2job-trainer
 spec:
  parallelism: 20
  completions: 20
  template:
    metadata:
      labels:
        paddle-job: vgg16v2job
    spec:
      imagePullSecrets:
        - name: job-registry-secret
      hostNetwork: true
      containers:
      - name: trainer
        image: "registry.baidu.com/paddlepaddle/fluid_benchmark:vgg16"
        imagePullPolicy: Always
        command: ["paddle_k8s", "start_trainer", "v2"]
        env:
        - name: PADDLE_JOB_NAME
          value: vgg16v2job
        - name: BATCH_SIZE
          value: "256"
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
          value: "10"
        - name: TOPOLOGY
          value: ""
        - name: ENTRY
          value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16_v2.py"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: PADDLE_INIT_PORT
          value: "30236"
        - name: PADDLE_INIT_NICS
          value: "xgbe0"
        - name: PADDLE_INIT_TRAINER_COUNT
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM
          value: "1"
        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
          value: "1"
        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
          value: "20"
        - name: PADDLE_INIT_NUM_PASSES
          value: "2"
        - name: PADDLE_INIT_USE_GPU
          value: "0"
        - name: LD_LIBRARY_PATH
          value: "/usr/local/lib:/usr/local/nvidia/lib64"
        - name: NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: "metadata.namespace"
        resources:
          requests:
            memory: 40Gi
            cpu: 2
          limits:
            memory: 40Gi
            cpu: 2
      restartPolicy: Never
--- a/benchmark/cluster/vgg16/vgg16_fluid.py
+++ b/benchmark/cluster/vgg16/vgg16_fluid.py
--- a/benchmark/cluster/vgg16/vgg16_tf.py
+++ b/benchmark/cluster/vgg16/vgg16_tf.py
--- a/benchmark/cluster/vgg16/vgg16_v2.py
+++ b/benchmark/cluster/vgg16/vgg16_v2.py
@ -1,154 +0,0 @@
 #  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 #Licensed under the Apache License, Version 2.0 (the "License");
 #you may not use this file except in compliance with the License.
 #You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 #Unless required by applicable law or agreed to in writing, software
 #distributed under the License is distributed on an "AS IS" BASIS,
 #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #See the License for the specific language governing permissions and
 #limitations under the License.
 import gzip
 import paddle.v2.dataset.cifar as cifar
 import paddle.v2 as paddle
 import time
 import os
 DATA_DIM = 3 * 32 * 32
 CLASS_DIM = 10
 BATCH_SIZE = os.getenv("BATCH_SIZE")
 if BATCH_SIZE:
    BATCH_SIZE = int(BATCH_SIZE)
 else:
    BATCH_SIZE = 128
 print "batch_size", BATCH_SIZE
 NODE_COUNT = int(os.getenv("TRAINERS"))
 ts = 0
 def vgg(input, nums, class_dim):
    def conv_block(input, num_filter, groups, num_channels=None):
        return paddle.networks.img_conv_group(
            input=input,
            num_channels=num_channels,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act=paddle.activation.Relu(),
            pool_type=paddle.pooling.Max())
    assert len(nums) == 5
    # the channel of input feature is 3
    conv1 = conv_block(input, 64, nums[0], 3)
    conv2 = conv_block(conv1, 128, nums[1])
    conv3 = conv_block(conv2, 256, nums[2])
    conv4 = conv_block(conv3, 512, nums[3])
    conv5 = conv_block(conv4, 512, nums[4])
    fc_dim = 512
    fc1 = paddle.layer.fc(input=conv5,
                          size=fc_dim,
                          act=paddle.activation.Relu(),
                          layer_attr=paddle.attr.Extra(drop_rate=0.5))
    fc2 = paddle.layer.fc(input=fc1,
                          size=fc_dim,
                          act=paddle.activation.Relu(),
                          layer_attr=paddle.attr.Extra(drop_rate=0.5))
    out = paddle.layer.fc(input=fc2,
                          size=class_dim,
                          act=paddle.activation.Softmax())
    return out
 def vgg13(input, class_dim):
    nums = [2, 2, 2, 2, 2]
    return vgg(input, nums, class_dim)
 def vgg16(input, class_dim):
    nums = [2, 2, 3, 3, 3]
    return vgg(input, nums, class_dim)
 def vgg19(input, class_dim):
    nums = [2, 2, 4, 4, 4]
    return vgg(input, nums, class_dim)
 def main():
    global ts
    paddle.init(use_gpu=False)
    image = paddle.layer.data(
        name="image", type=paddle.data_type.dense_vector(DATA_DIM))
    lbl = paddle.layer.data(
        name="label", type=paddle.data_type.integer_value(CLASS_DIM))
    extra_layers = None
    # NOTE: for v2 distributed training need averaging updates.
    learning_rate = 1e-3 / NODE_COUNT
    out = vgg16(image, class_dim=CLASS_DIM)
    cost = paddle.layer.classification_cost(input=out, label=lbl)
    # Create parameters
    parameters = paddle.parameters.create(cost)
    # Create optimizer
    optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
                                                         BATCH_SIZE),
        learning_rate=learning_rate / BATCH_SIZE,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=128000 * 35,
        learning_rate_schedule="discexp", )
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            cifar.train10(),
            # To use other data, replace the above line with:
            # reader.train_reader('train.list'),
            buf_size=1000),
        batch_size=BATCH_SIZE)
    test_reader = paddle.batch(
        cifar.test10(),
        # To use other data, replace the above line with:
        # reader.test_reader('val.list'),
        batch_size=BATCH_SIZE)
    # Create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer,
                                 extra_layers=extra_layers,
                                 is_local=False)
    # End batch and end pass event handler
    def event_handler(event):
        global ts, ts_pass
        if isinstance(event, paddle.event.BeginPass):
            ts_pass = time.time()
        if isinstance(event, paddle.event.BeginIteration):
            ts = time.time()
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 1 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s, spent: %f" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    time.time() - ts)
        if isinstance(event, paddle.event.EndPass):
            print "Pass %d end, spent: %f" % (event.pass_id,
                                              time.time() - ts_pass)
            result = trainer.test(reader=test_reader)
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
    trainer.train(
        reader=train_reader, num_passes=200, event_handler=event_handler)
 if __name__ == '__main__':
    main()
--- a/Show More
+++ b/Show More