Merge branch 'develop' of https://github.com/baidu/Paddle into benchmark_cfg_doc

9 years ago · b1cc9da4b7
parent 9d377f0996 85f0e18460
commit b1cc9da4b7
791 changed files with 37038 additions and 19626 deletions
--- a/.clang-format
+++ b/.clang-format
@ -13,8 +13,6 @@
 # The document of clang-format is 
 #   http://clang.llvm.org/docs/ClangFormat.html
 #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
 ---
 Language:        Cpp
 BasedOnStyle:  Google
@ -22,8 +20,9 @@ IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -2  # The private/protected/public has no indent in class
-PointerAlignment: Left    # int* p/int& p, not int *p/int &p
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
 ...

--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,20 @@
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: c25201a00e6b0514370501050cf2a8538ac12270
+    hooks:
+    -   id: remove-crlf
+-   repo: https://github.com/reyoung/mirrors-yapf.git
+    sha: v0.13.2
+    hooks:
+    -   id: yapf
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+-   repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
+    sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
+    hooks:
+    -   id: clang-formater
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/.travis.yml
+++ b/.travis.yml
@ -35,11 +35,22 @@ addons:
      - libgoogle-glog-dev
      - libgflags-dev
      - libgtest-dev
+      - curl
+      - lcov
      - graphviz
+      - swig
 before_install:
+  - |
+    if [ ${JOB} == "BUILD_AND_TEST" ]; then
+      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+      then
+        echo "Only markdown docs were updated, stopping build process."
+        exit
+      fi
+    fi
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
-  - pip install wheel protobuf sphinx breathe recommonmark
+  - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
 script:
  - paddle/scripts/travis/main.sh
 notifications:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,14 +2,14 @@ cmake_minimum_required(VERSION 2.8)

 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b2)
+set(PADDLE_MINOR_VERSION 9)
+set(PADDLE_PATCH_VERSION 0a0)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})

 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
 set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
 include(package)
-include(swig)
+find_package(SWIG 2.0)
 find_package(CUDA QUIET)
 find_package(Protobuf REQUIRED)
 find_package(PythonLibs 2.7 REQUIRED)
@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
 option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
 option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
 option(ON_TRAVIS "Running test on travis-ci or not." OFF)
+option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
+option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
+
 if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING 
        "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
@ -49,11 +52,16 @@ endif()
 include(enableCXX11)
 include(cpplint)
 include(ccache)
+if(WITH_RDMA)
+  include(rdma)
+endif()
 include(util)
 include(flags)
 include(cudnn)
 include(FindPythonModule)
 include(check_packages)
+include(swig)
+include(coveralls)

 # add PaddlePaddle version
 if(DEFINED ENV{PADDLE_VERSION})
@ -87,11 +95,24 @@ if(NOT WITH_GPU)
    add_definitions(-DHPPL_STUB_FUNC)
    list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
 else()
+    if(${CUDA_VERSION_MAJOR} GREATER 6)
+        if(COMPILER_SUPPORT_CXX11)
+            LIST(APPEND CUDA_NVCC_FLAGS -std=c++11)
+        endif()
+    endif()
+
    # TODO(yuyang18): Change it to remove std=c++11 in cuda compile.
    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
    if(NOT CUDNN_FOUND)
        message(FATAL_ERROR "Paddle need cudnn to compile")
    endif()
+    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")
+
+    if(WITH_AVX)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
+    else(WITH_AVX)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
+    endif(WITH_AVX)

    if(WITH_DSO)
        set(CUDA_LIBRARIES "")
@ -115,11 +136,11 @@ if(NOT WITH_TIMER)
 endif(NOT WITH_TIMER)

 if(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
 else(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
 endif(WITH_AVX)

 if(WITH_PYTHON)
@ -129,12 +150,15 @@ else(WITH_PYTHON)
    add_definitions(-DPADDLE_NO_PYTHON)
 endif(WITH_PYTHON)

-if(NOT WITH_RDMA)
-    add_definitions(-DPADDLE_DISABLE_RDMA)
-endif()
+if(WITH_RDMA)
+  include_directories("${RDMA_INC_DIR}")
+else(WITH_RDMA)
+  add_definitions(-DPADDLE_DISABLE_RDMA)
+endif(WITH_RDMA)

 if(WITH_GLOG)
    add_definitions(-DPADDLE_USE_GLOG)
+    include_directories(${LIBGLOG_INCLUDE_DIR})
 endif()

 if(WITH_GFLAGS)
--- a/ISSUE_TEMPLATE.md
+++ b/ISSUE_TEMPLATE.md
@ -0,0 +1,14 @@
+Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
+Both Chinese and English issues are welcome.
+
+It's hard to solve a problem when important details are missing.
+Before submitting the issue, look over the following criteria before handing your request in.
+
+- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
+- [ ] Did you retrieve your issue from widespread search engines ?
+- [ ] Is my description of the issue clear enough to reproduce this problem?
+   * If some errors occurred, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
+   * If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
+- [ ] Is my description of the issue use the github markdown correctly?
+   * Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
+   * Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.
--- a/README.md
+++ b/README.md
@ -1,8 +1,13 @@
 # PaddlePaddle

-|  **`Linux`**   | **`License`** | **`Chat Room`** |
-|----------------|---------------|-----------------|
-|[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)|[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)|[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)|
+
+[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
+[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+

 Welcome to the PaddlePaddle GitHub.

@ -12,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
 learning to many products at Baidu.

 Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release log](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle. 
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.

 ## Features

@ -24,15 +29,15 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t
    connection.

 -  **Efficiency**
-  
+
    In order to unleash the power of heterogeneous computing resource,
    optimization occurs at different levels of PaddlePaddle, including
    computing, memory, architecture and communication. The following are some
    examples:

      - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
-      (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. 
-      - Highly optimized recurrent networks which can handle **variable-length** 
+      (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
+      - Highly optimized recurrent networks which can handle **variable-length**
      sequence without padding.
      - Optimized local and distributed training for models with high dimensional
      sparse data.
@ -55,41 +60,39 @@ Please refer to our [release log](https://github.com/baidu/Paddle/releases) to t

 ## Installation
 Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or 
+pre-built packages (**docker image**, **deb package**) or
 directly build on **Linux** and **Mac OS X** from the source code.
- 
+
 ## Documentation
 Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.

 - [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
   You can follow the quick start tutorial to learn how use PaddlePaddle
   step-by-step.
-    
+
 - [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
   We provide five demos, including: image classification, sentiment analysis,
-   sequence to sequence model, recommendation, semantic role labeling. 
-   
+   sequence to sequence model, recommendation, semantic role labeling.
+
 - [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
  This system supports training deep learning models on multiple machines
  with data parallelism.
-   
+
 - [Python API](http://paddlepaddle.org/doc/ui/) <br>
   PaddlePaddle supports using either Python interface or C++ to build your
   system. We also use SWIG to wrap C++ source code to create a user friendly
   interface for Python. You can also use SWIG to create interface for your
   favorite programming language.
- 
+
 - [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
   We sincerely appreciate your interest and contributions. If you would like to
-   contribute, please read the contribution guide.   
+   contribute, please read the contribution guide.

 - [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>

 ## Ask Questions
-Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-Framework development discussions and
-bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
+
+You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).

 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
--- a/benchmark/README.md
+++ b/benchmark/README.md
@ -5,11 +5,11 @@ Machine:
 - CPU: 12-core Intel(R) Xeon(R) CPU E5-2620 v2 @2.10GHz
 - GPU: Tesla K40m
 - cuDNN: v5.1
- system: Docker 1.12.1, all platform are tested in docker environment.
+- system: Docker 1.12.1, all platforms are tested in docker environment.

 Platforms: 

- PaddlePaddle: 
+- PaddlePaddle: paddledev/paddle:gpu-devel-v0.9.0a0 
 - Tensorflow: gcr.io/tensorflow/tensorflow:0.11.0rc0-gpu 
 - Caffe: kaixhin/cuda-caffe

@ -28,7 +28,7 @@ AlexNet, GoogleNet and a small network used in Caffe.
 - [SmallNet](https://github.com/BVLC/caffe/blob/master/examples/cifar10/cifar10\_quick\_train\_test.prototxt)


-### Singe-GPU
+### Single-GPU

 - AlexNet:  input - 3 * 227 * 227,  Time: ms/batch

@ -61,7 +61,7 @@ All platforms use cuDNN-v5.1. We see that caffe is slower in this experiment, be

 **Notation**

-All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. The time in PaddlePaddle and TensorFlow contains it. But, compared with the total time, the time of parameter updating is relatively little.
+All the experiments in caffe use `caffe time` to execute, which does not include the time of parameter updating. While PaddlePaddle and TensorFlow contains this time. But, compared with the total time, the time of parameter updating is relatively little on single machine.

 In Tensorflow, they implement algorithm searching method instead of using the algorithm searching interface in cuDNN.

@ -106,7 +106,7 @@ We use lstm network for text classfication to test benchmark.
 - Dictionary size=30000 
 - Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.

-### Single GPU
+### Single-GPU

 #### LSTM in Text Classification

--- a/benchmark/paddle/image/alexnet.py
+++ b/benchmark/paddle/image/alexnet.py
@ -2,56 +2,63 @@

 from paddle.trainer_config_helpers import *

-height=227
-width=227
+height = 227
+width = 227
 num_class = 1000
-batch_size = get_config_arg('batch_size', int, 128) 
-
-args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
-define_py_data_sources2("train.list",
-                        None,
-                        module="provider",
-                        obj="process",
-                        args=args)
+batch_size = get_config_arg('batch_size', int, 128)

+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+    "train.list", None, module="provider", obj="process", args=args)

 settings(
-    batch_size = batch_size,
-    learning_rate = 0.01 / batch_size,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * batch_size)
-)
-
+    batch_size=batch_size,
+    learning_rate=0.01 / batch_size,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * batch_size))

 # conv1
 net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(input=net, filter_size=11, num_channels=3,
-      num_filters=96, stride=4, padding=1)
+net = img_conv_layer(
+    input=net,
+    filter_size=11,
+    num_channels=3,
+    num_filters=96,
+    stride=4,
+    padding=1)
 net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
-net = img_pool_layer(input=net, pool_size=3, stride=2) 
+net = img_pool_layer(input=net, pool_size=3, stride=2)

 # conv2
-net = img_conv_layer(input=net, filter_size=5, num_filters=256,
-      stride=1, padding=2, groups=1)
+net = img_conv_layer(
+    input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
 net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
 net = img_pool_layer(input=net, pool_size=3, stride=2)

 # conv3
-net = img_conv_layer(input=net, filter_size=3, num_filters=384,
-      stride=1, padding=1)
+net = img_conv_layer(
+    input=net, filter_size=3, num_filters=384, stride=1, padding=1)
 # conv4
-net = img_conv_layer(input=net, filter_size=3, num_filters=384,
-      stride=1, padding=1, groups=1)
+net = img_conv_layer(
+    input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)

 # conv5
-net = img_conv_layer(input=net, filter_size=3, num_filters=256,
-      stride=1, padding=1, groups=1)
+net = img_conv_layer(
+    input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
 net = img_pool_layer(input=net, pool_size=3, stride=2)

-net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
-net = fc_layer(input=net, size=4096, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+    input=net,
+    size=4096,
+    act=ReluActivation(),
+    layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+    input=net,
+    size=4096,
+    act=ReluActivation(),
+    layer_attr=ExtraAttr(drop_rate=0.5))
 net = fc_layer(input=net, size=1000, act=SoftmaxActivation())

 lab = data_layer('label', num_class)
-loss = cross_entropy(input=net, label=lab) 
+loss = cross_entropy(input=net, label=lab)
 outputs(loss)
--- a/benchmark/paddle/image/googlenet.py
+++ b/benchmark/paddle/image/googlenet.py
--- a/benchmark/paddle/image/provider.py
+++ b/benchmark/paddle/image/provider.py
@ -1,13 +1,14 @@
-import io,os
+import io, os
 import random
 import numpy as np
 from paddle.trainer.PyDataProvider2 import *

+
 def initHook(settings, height, width, color, num_class, **kwargs):
-    settings.height = height 
-    settings.width = width 
-    settings.color = color 
-    settings.num_class = num_class 
+    settings.height = height
+    settings.width = width
+    settings.color = color
+    settings.num_class = num_class
    if settings.color:
        settings.data_size = settings.height * settings.width * 3
    else:
@ -15,7 +16,9 @@ def initHook(settings, height, width, color, num_class, **kwargs):

    settings.slots = [dense_vector(settings.data_size), integer_value(1)]

-@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+
+@provider(
+    init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, file_list):
    with open(file_list, 'r') as fdata:
        for line in fdata:
--- a/benchmark/paddle/image/smallnet_mnist_cifar.py
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@ -2,42 +2,44 @@

 from paddle.trainer_config_helpers import *

-height=32
-width=32
+height = 32
+width = 32
 num_class = 10

-batch_size = get_config_arg('batch_size', int, 128) 
+batch_size = get_config_arg('batch_size', int, 128)

-args={'height':height, 'width':width, 'color':True, 'num_class':num_class}
-define_py_data_sources2("train.list",
-                        None,
-                        module="provider",
-                        obj="process",
-                        args=args)
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+    "train.list", None, module="provider", obj="process", args=args)

 settings(
-    batch_size = batch_size,
-    learning_rate = 0.01 / batch_size,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * batch_size)
-)
-
+    batch_size=batch_size,
+    learning_rate=0.01 / batch_size,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * batch_size))

 # conv1
 net = data_layer('data', size=height * width * 3)
-net = img_conv_layer(input=net, filter_size=5, num_channels=3,
-                     num_filters=32, stride=1, padding=2)
+net = img_conv_layer(
+    input=net,
+    filter_size=5,
+    num_channels=3,
+    num_filters=32,
+    stride=1,
+    padding=2)
 net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)

 # conv2
-net = img_conv_layer(input=net, filter_size=5, num_filters=32,
-                     stride=1, padding=2)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+net = img_conv_layer(
+    input=net, filter_size=5, num_filters=32, stride=1, padding=2)
+net = img_pool_layer(
+    input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())

 # conv3
-net = img_conv_layer(input=net, filter_size=3, num_filters=64,
-                     stride=1, padding=1)
-net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+net = img_conv_layer(
+    input=net, filter_size=3, num_filters=64, stride=1, padding=1)
+net = img_pool_layer(
+    input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())

 net = fc_layer(input=net, size=64, act=ReluActivation())
 net = fc_layer(input=net, size=10, act=SoftmaxActivation())
--- a/benchmark/paddle/rnn/imdb.py
+++ b/benchmark/paddle/rnn/imdb.py
@ -4,6 +4,7 @@ import gzip
 import os
 import numpy

+
 def get_dataset_file(dataset, default_dataset, origin):
    data_dir, data_file = os.path.split(dataset)
    if (not os.path.isfile(dataset)) and data_file == default_dataset:
@ -13,13 +14,14 @@ def get_dataset_file(dataset, default_dataset, origin):

    return dataset

+
 def create_data(path="imdb.pkl"):

    if (not os.path.isfile('imdb.train.pkl')):
        path = get_dataset_file(
            path, "imdb.pkl",
            "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
-    
+
        if path.endswith(".gz"):
            f = gzip.open(path, 'rb')
        else:
@ -35,8 +37,10 @@ def create_data(path="imdb.pkl"):
    if (not os.path.isfile('train.list')):
        file('train.list', 'w').write('imdb.train.pkl\n')

+
 def main():
    create_data('imdb.pkl')

+
 if __name__ == "__main__":
    main()
--- a/benchmark/paddle/rnn/provider.py
+++ b/benchmark/paddle/rnn/provider.py
@ -1,19 +1,25 @@
-import io,os
+import io, os
 import random
 import numpy as np
 import six.moves.cPickle as pickle
 from paddle.trainer.PyDataProvider2 import *

+
 def remove_unk(x, n_words):
    return [[1 if w >= n_words else w for w in sen] for sen in x]

+
 # ==============================================================
 #  tensorflow uses fixed length, but PaddlePaddle can process
 #  variable-length. Padding is used in benchmark in order to
 #  compare with other platform. 
 # ==============================================================
-def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
-                  truncating='post', value=0.):
+def pad_sequences(sequences,
+                  maxlen=None,
+                  dtype='int32',
+                  padding='post',
+                  truncating='post',
+                  value=0.):
    lengths = [len(s) for s in sequences]

    nb_samples = len(sequences)
@ -43,12 +49,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
 def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
    settings.vocab_size = vocab_size
    settings.pad_seq = pad_seq
-    settings.maxlen = maxlen 
+    settings.maxlen = maxlen
    settings.input_types = [
-        integer_value_sequence(vocab_size),
-        integer_value(2)]
+        integer_value_sequence(vocab_size), integer_value(2)
+    ]
+

-@provider(init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+@provider(
+    init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, file):
    f = open(file, 'rb')
    train_set = pickle.load(f)
@ -57,8 +65,8 @@ def process(settings, file):

    # remove unk, namely remove the words out of dictionary
    x = remove_unk(x, settings.vocab_size)
-    if settings.pad_seq: 
+    if settings.pad_seq:
        x = pad_sequences(x, maxlen=settings.maxlen, value=0.)

    for i in range(len(y)):
-        yield map(int,x[i]), int(y[i])
+        yield map(int, x[i]), int(y[i])
--- a/benchmark/paddle/rnn/rnn.py
+++ b/benchmark/paddle/rnn/rnn.py
@ -6,33 +6,29 @@ import imdb
 num_class = 2
 vocab_size = 30000
 fixedlen = 100
-batch_size = get_config_arg('batch_size', int, 128) 
-lstm_num = get_config_arg('lstm_num', int, 1) 
-hidden_size = get_config_arg('hidden_size', int, 128) 
+batch_size = get_config_arg('batch_size', int, 128)
+lstm_num = get_config_arg('lstm_num', int, 1)
+hidden_size = get_config_arg('hidden_size', int, 128)
 # whether to pad sequence into fixed length
 pad_seq = get_config_arg('pad_seq', bool, True)
 imdb.create_data('imdb.pkl')

-args={'vocab_size':vocab_size, 'pad_seq':pad_seq, 'maxlen':fixedlen}
-define_py_data_sources2("train.list",
-                        None,
-                        module="provider",
-                        obj="process",
-                        args=args)
+args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
+define_py_data_sources2(
+    "train.list", None, module="provider", obj="process", args=args)

 settings(
    batch_size=batch_size,
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25
-)
+    gradient_clipping_threshold=25)

 net = data_layer('data', size=vocab_size)
 net = embedding_layer(input=net, size=128)

 for i in xrange(lstm_num):
-    net = simple_lstm(input=net, size=hidden_size) 
+    net = simple_lstm(input=net, size=hidden_size)

 net = last_seq(input=net)
 net = fc_layer(input=net, size=2, act=SoftmaxActivation())
--- a/benchmark/tensorflow/image/alexnet.py
+++ b/benchmark/tensorflow/image/alexnet.py
--- a/benchmark/tensorflow/image/alexnet_multi_gpu.py
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
--- a/benchmark/tensorflow/image/googlenet.py
+++ b/benchmark/tensorflow/image/googlenet.py
--- a/benchmark/tensorflow/image/googlenet_multi_gpu.py
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
--- a/benchmark/tensorflow/image/smallnet_mnist_cifar.py
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
--- a/benchmark/tensorflow/rnn/README.md
+++ b/benchmark/tensorflow/rnn/README.md
@ -1,5 +1,5 @@
 You also should install tflearn:

 ```bash
-pip install tflearn
+pip install -r requirements.txt
 ```
--- a/benchmark/tensorflow/rnn/reader.py
+++ b/benchmark/tensorflow/rnn/reader.py
@ -8,14 +8,13 @@ import tflearn
 from tflearn.data_utils import to_categorical, pad_sequences
 from tflearn.datasets import imdb

-
 FLAGS = tf.app.flags.FLAGS

+
 class DataSet(object):
    def __init__(self, data, labels):
        assert data.shape[0] == labels.shape[0], (
-            'data.shape: %s labels.shape: %s' % (data.shape,
-                                                 labels.shape))
+            'data.shape: %s labels.shape: %s' % (data.shape, labels.shape))
        self._num_examples = data.shape[0]

        self._data = data
@ -64,8 +63,11 @@ class DataSet(object):
 def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):

    # IMDB Dataset loading
-    train, test, _ = imdb.load_data(path=file_path, n_words=vocab_size,
-                                valid_portion=val_fraction, sort_by_len=False)
+    train, test, _ = imdb.load_data(
+        path=file_path,
+        n_words=vocab_size,
+        valid_portion=val_fraction,
+        sort_by_len=False)
    trainX, trainY = train
    testX, testY = test

--- a/benchmark/tensorflow/rnn/requirements.txt
+++ b/benchmark/tensorflow/rnn/requirements.txt
@ -0,0 +1 @@
+tflearn
--- a/benchmark/tensorflow/rnn/rnn.py
+++ b/benchmark/tensorflow/rnn/rnn.py
--- a/Show More
+++ b/Show More