Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into add-GRUOp-dev

7 years ago · 9162629b04
parent b87eabae56 ef2f0ec968
commit 9162629b04
338 changed files with 14535 additions and 2981 deletions
--- a/.gitignore
+++ b/.gitignore
@ -28,3 +28,4 @@ cmake_install.cmake
 paddle/.timestamp
 python/paddlepaddle.egg-info/
 paddle/pybind/pybind.h
+python/paddle/v2/framework/tests/tmp/*
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -127,6 +127,7 @@ include(external/warpctc)   # download, build, install warpctc
 include(external/any)       # download libn::any
 include(external/eigen)     # download eigen3
 include(external/pybind11)    # download pybind11
+include(external/nccl)

 include(cudnn)              # set cudnn libraries, must before configure
 include(configure)          # add paddle env configuration
@ -159,7 +160,7 @@ set(EXTERNAL_LIBS
 if(WITH_GPU)
    list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
    if(NOT WITH_DSO)
-        list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
+        list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
    endif(NOT WITH_DSO)
 endif(WITH_GPU)

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1 +1,157 @@
-./doc/howto/dev/contribute_to_paddle_en.md
+# Contribute Code
+
+We sincerely appreciate your contribution.  This document explains our workflow and work style.
+
+## Workflow
+
+PaddlePaddle uses this [Git branching model](http://nvie.com/posts/a-successful-git-branching-model/).  The following steps guide usual contributions.
+
+1. Fork
+
+   Our development community has been growing fastly; it doesn't make sense for everyone to write into the official repo.  So, please file Pull Requests from your fork.  To make a fork,  just head over to the GitHub page and click the ["Fork" button](https://help.github.com/articles/fork-a-repo/).
+
+1. Clone
+
+   To make a copy of your fork to your local computers, please run
+
+   ```bash
+   git clone https://github.com/your-github-account/paddle
+   cd paddle
+   ```
+
+1. Create the local feature branch
+
+   For daily works like adding a new feature or fixing a bug, please open your feature branch before coding:
+
+   ```bash
+   git checkout -b my-cool-stuff
+   ```
+
+1. Commit
+
+   Before issuing your first `git commit` command, please install [`pre-commit`](http://pre-commit.com/) by running the following commands:
+
+   ```bash
+   pip install pre-commit
+   pre-commit install
+   ```
+
+   Our pre-commit configuration requires clang-format 3.8 for auto-formating C/C++ code and yapf for Python.
+
+   Once installed, `pre-commit` checks the style of code and documentation in every commit.  We will see something like the following when you run `git commit`:
+
+   ```
+   ➜  git commit
+   CRLF end-lines remover...............................(no files to check)Skipped
+   yapf.................................................(no files to check)Skipped
+   Check for added large files..............................................Passed
+   Check for merge conflicts................................................Passed
+   Check for broken symlinks................................................Passed
+   Detect Private Key...................................(no files to check)Skipped
+   Fix End of Files.....................................(no files to check)Skipped
+   clang-formater.......................................(no files to check)Skipped
+   [my-cool-stuff c703c041] add test file
+    1 file changed, 0 insertions(+), 0 deletions(-)
+    create mode 100644 233
+   ```
+
+1. Build and test
+
+   Users can build PaddlePaddle natively on Linux and Mac OS X.  But to unify the building environment and to make it easy for debugging, the recommended way is [using Docker](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/build_en.md).
+
+1. Keep pulling
+
+   An experienced Git user pulls from the official repo often -- daily or even hourly, so they notice conflicts with others work early, and it's easier to resolve smaller conflicts.
+
+   ```bash
+   git remote add upstream https://github.com/PaddlePaddle/Paddle
+   git pull upstream develop
+   ```
+
+1. Push and file a pull request
+
+   You can "push" your local work into your forked repo:
+
+   ```bash
+   git push origin my-cool-stuff
+   ```
+
+   The push allows you to create a pull request, requesting owners of this [official repo](https://github.com/PaddlePaddle/Paddle) to pull your change into the official one.
+
+   To create a pull request, please follow [these steps](https://help.github.com/articles/creating-a-pull-request/).
+
+   If your change is for fixing an issue, please write ["Fixes <issue-URL>"](https://help.github.com/articles/closing-issues-using-keywords/) in the description section of your pull request.  Github would close the issue when the owners merge your pull request.
+
+   Please remember to specify some reviewers for your pull request.  If you don't know who are the right ones, please follow Github's recommendation.
+
+
+1. Delete local and remote branches
+
+   To keep your local workspace and your fork clean, you might want to remove merged branches:
+
+   ```bash
+   git push origin :my-cool-stuff
+   git checkout develop
+   git pull upstream develop
+   git branch -d my-cool-stuff
+   ```
+
+### Code Review
+
+-  Please feel free to ping your reviewers by sending them the URL of your pull request via IM or email.  Please do this after your pull request passes the CI.
+
+- Please answer reviewers' every comment.  If you are to follow the comment, please write "Done"; please give a reason otherwise.
+
+- If you don't want your reviewers to get overwhelmed by email notifications, you might reply their comments by [in a batch](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/).
+
+- Reduce the unnecessary commits.  Some developers commit often.  It is recommended to append a sequence of small changes into one commit by running `git commit --amend` instead of `git commit`.
+
+
+## Coding Standard
+
+### Code Style
+
+Our C/C++ code follows the [Google style guide](http://google.github.io/styleguide/cppguide.html).
+
+Our Python code follows the [PEP8 style guide](https://www.python.org/dev/peps/pep-0008/).
+
+Our build process helps to check the code style.  In [`build.sh`](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/docker/build.sh#L42), the entry point of our [builder Docker image](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/Dockerfile#L88), the CMake argument `WITH_STYLE_CHECK` is set to `ON` by default.  This flag is on
+
+Please install pre-commit, which automatically reformat the changes to C/C++ and Python code whenever we run `git commit`.  To check the whole codebase, we can run the command `pre-commit run -a`, as in the [`check_style.sh` file](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/paddle/scripts/travis/check_style.sh#L30), which is invoked by [our Travis CI configuration](https://github.com/PaddlePaddle/Paddle/blob/b84e8226514b8bb4405c3c28e54aa5077193d179/.travis.yml#L43).
+
+### Unit Tests
+
+Please remember to add related unit tests.
+
+- For C/C++ code, please follow [`google-test` Primer](https://github.com/google/googletest/blob/master/googletest/docs/Primer.md).
+
+- For Python code, please use [Python's standard `unittest` package](http://pythontesting.net/framework/unittest/unittest-introduction/).
+
+
+### Writing Logs
+
+We use [glog](https://github.com/google/glog) for logging in our C/C++ code.
+
+For general information, please use `LOG`.  For debug information, please use [`VLOG`](http://htmlpreview.github.io/?https://github.com/google/glog/blob/master/doc/glog.html#verbose).  The reason is at [here](https://groups.google.com/a/chromium.org/d/msg/chromium-dev/3NDNd1KzXeY/AZKMMx37fdQJ).
+
+`VLOG` requires a *verbose level* parameter.  For example:
+
+```c++
+VLOG(3) << "Operator FC is taking " << num_inputs << "inputs."
+```
+
+When we run a PaddlePaddle application or test, we can specify a verbose threshold.  For example:
+
+```bash
+GLOG_vmodule=buddy_allocator=2 \
+GLOG_v=10 \
+python \
+../python/paddle/v2/framework/tests/test_recurrent_op.py
+```
+
+This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the verbose range of 0 to 3, so you will see above example VLOG message, which is in level 3.  This suggests that we output overall messages in lower verbose levels, so they display with higher probability.  When coding C++, please follow the verbose level convention as follows:
+
+- verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework)
+- verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators)
+- verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform)
+- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math)
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@ -62,11 +62,11 @@ else()
    FIND_PACKAGE(CUDA REQUIRED)

    if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
-        message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
+        message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
    endif()

    if(NOT CUDNN_FOUND)
-        message(FATAL_ERROR "Paddle need cudnn to compile")
+        message(FATAL_ERROR "Paddle needs cudnn to compile")
    endif()

    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@ -8,7 +8,7 @@ ExternalProject_Add(
    extern_eigen3
    ${EXTERNAL_PROJECT_LOG_ARGS}
    GIT_REPOSITORY  "https://github.com/RLovelett/eigen.git"
-    GIT_TAG         4e79cb69b9425f5f8c3a84be4350d4ab75b5fd9d
+    GIT_TAG         70661066beef694cadf6c304d0d07e0758825c10
    PREFIX          ${EIGEN_SOURCE_DIR}
    UPDATE_COMMAND  ""
    CONFIGURE_COMMAND ""
--- a/cmake/external/nccl.cmake
+++ b/cmake/external/nccl.cmake
@ -0,0 +1,49 @@
+include(ExternalProject)
+
+set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
+
+include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
+
+if(WITH_DSO)
+  # If we use DSO, we do not build nccl, just download the dependencies
+  set(NCCL_BUILD_COMMAND "")
+  set(NCCL_INSTALL_COMMAND "")
+  set(NCCL_INSTALL_DIR "")
+else()
+  # otherwise, we build nccl and link it.
+  set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
+  # Note: cuda 8.0 is needed to make nccl
+  # When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
+  set(NCCL_BUILD_COMMAND "make -j 8")
+  set(NCCL_INSTALL_COMMAND  "make install PREFIX=${NCCL_INSTALL_DIR}")
+endif()
+
+ExternalProject_Add(
+    extern_nccl
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    GIT_REPOSITORY  "https://github.com/NVIDIA/nccl.git"
+    GIT_TAG         "v1.3.4-1"
+    PREFIX          "${NCCL_SOURCE_DIR}"
+    UPDATE_COMMAND  ""
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND     "${NCCL_BUILD_COMMAND}"
+    INSTALL_COMMAND   "${NCCL_INSTALL_COMMAND}"
+    INSTALL_DIR       "${NCCL_INSTALL_DIR}"
+    TEST_COMMAND      ""
+)
+
+if(WITH_DSO)
+  if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
+    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
+    file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
+    add_library(nccl STATIC ${dummyfile})
+  else()
+    add_library(nccl INTERFACE)
+  endif()
+else()
+  add_library(nccl STATIC IMPORTED GLOBAL)
+  set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
+               ${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
+endif()
+
+add_dependencies(nccl extern_nccl)
--- a/doc/design/graph_survey.md
+++ b/doc/design/graph_survey.md
@ -0,0 +1,232 @@
+## Survey on Graph
+
+Neural network framework often provides symbolic API for users to write network topology conveniently. This doc manily focus on symbolic API in most popular neural network frameworks, and try to find out how to parse symbolic configuration to a portable file, such as protobuf or json.
+
+### Mxnet
+
+The core concept of symbolic API is `Symbol`. Mxnet implements `Symbol` class in C++, and export to Python using C-API. Please refer to the comments in Mxnet:
+
+
+`Symbol` is help class used to represent the operator node in Graph.
+`Symbol` acts as an interface for building graphs from different components like Variable, Functor and Group. `Symbol` is also exported to python front-end (while Graph is not) to enable quick test and deployment. Conceptually, symbol is the final operation of a graph and thus including all the information required (the graph) to evaluate its output value.
+
+
+A simple network topology wrote by Symbol is as follows:
+
+```python
+def get_symbol(num_classes=10, **kwargs):
+    data = mx.symbol.Variable('data')
+    data = mx.symbol.Flatten(data=data)
+    fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
+    act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
+    fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
+    act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
+    fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
+    mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
+    return mlp
+```
+
+
+
+Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own NodeAttr. There is a op field in NodeAttr class, when a Symbol represents Variable(often input data), the op field is null.
+
+Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph.
+
+And Symbol can be saved to a Json file.
+
+Here is a detailed example:
+
+```
+>>> import mxnet as mx
+>>> data = mx.symbol.Variable('data')
+>>> print data.debug_str()
+Variable:data
+
+>>> data = mx.symbol.Flatten(data=data)
+>>> print data.debug_str()
+Symbol Outputs:
+	output[0]=flatten0(0)
+Variable:data
+--------------------
+Op:Flatten, Name=flatten0
+Inputs:
+	arg[0]=data(0) version=0
+
+>>> fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
+>>> print fc1.debug_str()
+Symbol Outputs:
+	output[0]=fc1(0)
+Variable:data
+--------------------
+Op:Flatten, Name=flatten0
+Inputs:
+	arg[0]=data(0) version=0
+Variable:fc1_weight
+Variable:fc1_bias
+--------------------
+Op:FullyConnected, Name=fc1
+Inputs:
+	arg[0]=flatten0(0)
+	arg[1]=fc1_weight(0) version=0
+	arg[2]=fc1_bias(0) version=0
+Attrs:
+	num_hidden=128
+
+```
+
+
+### TensorFlow
+
+
+The core concept of symbolic API is `Tensor`. Tensorflow defines `Tensor` in Python. Please refer to the comments in TensorFlow:
+
+A `Tensor` is a symbolic handle to one of the outputs of an `Operation`. It does not hold the values of that operation's output, but instead provides a means of computing those values in a TensorFlow [Session](https://www.tensorflow.org/api_docs/python/tf/Session).
+
+A simple example is as follows:
+
+```python
+  # Build a dataflow graph.
+  c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
+  d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
+  e = tf.matmul(c, d)
+
+  # Construct a `Session` to execute the graph.
+  sess = tf.Session()
+
+  # Execute the graph and store the value that `e` represents in `result`.
+  result = sess.run(e)
+```
+
+  
+The main method of `Tensor` is as follows: 
+ 
+ 
+```python
+@property
+def op(self):
+  """The `Operation` that produces this tensor as an output."""
+  return self._op
+
+@property
+def dtype(self):
+   """The `DType` of elements in this tensor."""
+  return self._dtype
+
+@property
+def graph(self):
+  """The `Graph` that contains this tensor."""
+  return self._op.graph
+
+@property
+def name(self):
+  """The string name of this tensor."""
+  if not self._op.name:
+    raise ValueError("Operation was not named: %s" % self._op)
+  return "%s:%d" % (self._op.name, self._value_index)
+
+@property
+def device(self):
+  """The name of the device on which this tensor will be produced, or None."""
+  return self._op.device
+```
+
+
+Tensor can be taken as target to run by session. Tensor contains all the information of Graph, and tracks data dependency.
+
+
+Here is a detailed example:
+
+
+```
+>>> import tensorflow as tf
+>>> c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
+>>> print c.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+>>> d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
+>>> print d.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+>>> e = tf.matmul(c, d)
+>>> print e.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+```
+
+### Dynet
+
+
+The core concept of symbolic API is `Expression`, and Dynet defines `Expression` class in C++.
+
+
+A simple example is as follows:
+
+```cpp
+ComputationGraph cg;
+Expression W = parameter(cg, pW);
+
+Expression in = input(cg, xs[i]);
+Expression label = input(cg, ys[i]);
+Expression pred = W * in;
+Expression loss = square(pred - label);
+```
+
+The input data and parameter are also represented by Expression. Every basci Expression corresponds to a Node. And input data is also a Node. 
+
+Expression has a data member ComputationGraph, and ComputationGraph will be modified in users' configuring process. Expression can be a running target, beacuse Expression contains all dependency.
+
+
+Here is a detailed example:
+
+write topology in C++
+
+```
+ComputationGraph cg;
+Expression W = parameter(cg, pW);
+cg.print_graphviz();
+
+Expression pred = W * xs[i];
+cg.print_graphviz();
+
+Expression loss = square(pred - ys[i]);
+cg.print_graphviz();
+```
+
+compile and print
+
+```
+# first print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+}
+# second print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+  N1 [label="v1 = v0 * -0.98"];
+  N0 -> N1;
+}
+# third print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+  N1 [label="v1 = v0 * -0.98"];
+  N0 -> N1;
+  N2 [label="v2 = -1.88387 - v1"];
+  N1 -> N2;
+  N3 [label="v3 = -v2"];
+  N2 -> N3;
+  N4 [label="v4 = square(v3)"];
+  N3 -> N4;
+}
+```
+
+### Conclusion
+
+
+Actually, Symbol/Tensor/Expression in Mxnet/TensorFlow/Dynet are the same level concepts. We use a unified name Expression here, this level concept has following features:
+
+- Users wirte topoloy with symbolic API, and all return value is Expression, including input data and parameter.
+- Expression corresponds with a global Graph, and Expression can also be composed.
+- Expression tracks all dependency and can be taken as a run target
--- a/doc/design/model_format.md
+++ b/doc/design/model_format.md
@ -0,0 +1,36 @@
+# Design Doc: Model Format
+
+## Motivation
+
+A model is an output of the training process. One complete model consists of two parts, the **topology** and the **parameters**. In order to support industrial deployment, the model format must be self-complete and must not expose any training source code.
+
+As a result, In PaddlePaddle, the **topology** is represented as a  [ProgramDesc](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/doc/design/program.md), which describes the model structure. The **parameters** contain all the trainable weights in the model. We must support large size parameters and efficient serialization/deserialization of parameters. 
+
+## Implementation
+
+The topology is saved as a plain text in a detailed self-contain protobuf file. 
+
+The parameters are saved as a binary file. As we all know, the protobuf message has a limit of [64M size](https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream#CodedInputStream.SetTotalBytesLimit.details). We have done a [benchmark experiment](https://github.com/PaddlePaddle/Paddle/pull/4610), which shows that protobuf is not fit for the task.
+
+As a result, we design a particular format for tensor serialization. By default, an arbitrary tensor in Paddle is a [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/lod_tensor.md), and has a description information proto of [LoDTensorDesc](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L99). We save the DescProto as the byte string header. It contains all the necessary information, such as the `dims`, and the `LoD` information in [LoDTensor](https://github.com/PaddlePaddle/Paddle/blob/1c0a4c901c9fc881d120249c703b15d1c50dae7d/paddle/framework/lod_tensor.md). A tensor stores values in a continuous memory buffer. For speed we dump the raw memory to disk and save it as the byte string content. So, the binary format of one tensor is, 
+
+The table below shows a tensor's byte view in detail. Note that all the signed values are written in the little-endian format.
+
+|field name  | type | description |
+| --- | --- | --- |
+| version | uint32_t | Version of saved file. Always 0 now. |
+| tensor desc length | uint32_t | TensorDesc(Protobuf message) length in bytes. |
+| tensor desc | void* | TensorDesc protobuf binary message |
+| tensor data | void* | Tensor's data in binary format. The length of `tensor_data` is decided by `TensorDesc.dims()` and `TensorDesc.data_type()` |
+| lod_level | uint64_t | Level of LoD |
+| length of lod[0] | uint64_t | [Optional] length of lod[0] in bytes. |
+| data of lod[0] | uint64_t*  | [Optional] lod[0].data() |
+| ... | ... | ... |
+
+
+
+## Summary
+
+- We introduce a model format.
+- The model represented by its forward-pass computation procedure is saved in a **ProgramDesc** protobuf message.
+- A bunch of specified format binary tensors describe the **parameters**.
--- a/doc/design/optimizer.md
+++ b/doc/design/optimizer.md
@ -65,20 +65,6 @@ class Optimizer(object):
    def __init__(self):
        pass

-    def create_backward_pass(self, loss, parameter_list=None):
-        """
-        create and add gradient Operators in BlockDesc to Compute gradients of `loss`
-        for parameters in parameter_list
-
-        Args:
-          loss: an variable generated by cost function.
-          parameter_list: parameters that need to compute gradient and update to optimize the lost.
-
-        Returns:
-          list of (parameters, gradients) pair.
-        """
-        return None
-
    def create_optimization_pass(self, parameters_and_grads):
        """Add optimization operators to update gradients to variables.

@ -93,7 +79,7 @@ class Optimizer(object):
    def minimize(self, loss, parameter_list):
        """Add operations to minimize `loss` by updating `parameter_list`.

-        This method combines interface `create_backward_pass()` and
+        This method combines interface `append_backward_ops()` and
        `create_optimization_pass()` into one.
        """
        params_grads = self.create_backward_pass(loss, parameter_list)
--- a/doc/design/regularization.md
+++ b/doc/design/regularization.md
@ -1,7 +1,7 @@
 # Regularization in PaddlePaddle

 ## Introduction to Regularization
-A central problem in machine learning is how to design an algorithm that will perform well not just on the training data, but also on new data. Many strategies are used by machine learning practitioners to reduce the test error, possibly at the expense of increased training error. These strategies are collectively known as **regularization**. 
+A central problem in machine learning is how to design an algorithm that will perform well not just on the training data, but also on new data. A frequently faced problem is the problem of **overfitting**, where the model does not make reliable predictions on new unseen data. **Regularization** is the process of introducing additional information in order to prevent overfitting. This is usually done by adding extra penalties to the loss function that restricts the parameter spaces that an optimization algorithm can explore.

 ### Parameter Norm Penalties
 Most common regularization approaches in deep learning are based on limiting the capacity of the models by adding a parameter norm penalty to the objective function `J`. This is given as follows:
@ -18,52 +18,21 @@ The most commonly used norm penalties are the L2 norm penalty and the L1 norm pe
 ##### L1 Regularization
 <img src="./images/l1_regularization.png" align="center"/><br/>

-A much more detailed mathematical background of reguilarization can be found [here](http://www.deeplearningbook.org/contents/regularization.html).
+A much more detailed mathematical background of regularization can be found [here](http://www.deeplearningbook.org/contents/regularization.html).

+## Regularization Survey

-## How to do Regularization in PaddlePaddle
-
-On surveying existing frameworks like Tensorflow, PyTorch, Caffe, etc, it can be seen that there are 2 common approaches of doing regularization:
-
-1. Making regularization a part of the optimizer using an attribute like `weight_decay` that is used to control the scale of the L2 Penalty. This approach is used in PyTorch as follows:
-	```python
-	opt =  torch.optim.SGD(params, lr=0.2, weight_decay=0.2)
-	```
-    At every optimization step, this code will add the gradient of the L2 Norm of the params to the gradient of the params with respect to the loss function. This can seen in the following code snippet:
-    ```python
-    if weight_decay != 0:
-    	d_p.add_(weight_decay, p.data)
-    ```
-    This is a very restyrictive way of doing regularization and does not give the users enough flexibility. 
-    
-    **Advantages**:
-    -  It is easy to implement for us.
-    -  Faster execution of backward. However, it can be done manually by advanced users too.
-
-	**Disadvantages**:
-    - Not flexible for other regularizations such as L1/L0 regularization.
-    - Does not allow for different regularization coefficient for different parameters. For example, in most models, ony the weight matrices are regularized and the bias vectors are unregularized.
-    - Tightly coupled optimizer and regularization implementation. 
-
-
-2. Adding regularization ops to the graph through Python API. This approach is used by Tensorflow and Caffe. Using this approach, we manually add regularization ops to the graph and then add the regularization loss to the final loss function before sending them to the optimizer.
-
-	**Advantages**:
-    - Allows for greater flexibility to the users of Paddle. Using this approach, the users can put different regularization to different parameters and also choose parameters that are not a part of regularization.
-    - Makes it easy for the users to customize and extend the framework. 
-
-	**Disadvantages**:
-    - Implementation requires comprehensive design and time. 
+A detailed survey of regularization in various deep learning frameworks can be found [here](https://github.com/PaddlePaddle/Paddle/wiki/Regularization-Survey). 

 ## Proposal for Regularization in PaddlePaddle

 ### Low-Level implementation

-In the new design, we propose to create new operations for regularization. For now, we can add 2 ops thgat correspond to the most frequently used regularizations:
+In the new design, we propose to create new operations for regularization. For now, we can add 2 ops that correspond to the most frequently used regularizations:
 - L2_regularization_op
 - L1_regularization_op

-These ops can be like any other ops with their own CPU/GPU implementations either using Eigen or separate Cpu and GPU kernels. As the initial implementation, we can implement their kernels using Eigen following the abstraction pattern implemented for [Activation Ops](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/accuracy_op.h). This abstraction pattern can make it very easy to implement new regularization schemes. other than L1 and L2 norm penalties. 
+These ops can be like any other ops with their own CPU/GPU implementations either using Eigen or separate CPU and GPU kernels. As the initial implementation, we can implement their kernels using Eigen following the abstraction pattern implemented for [Activation Ops](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/accuracy_op.h). This abstraction pattern can make it very easy to implement new regularization schemes other than L1 and L2 norm penalties. 

 The idea of building ops for regularization is in sync with the refactored Paddle philosophy of using operators to represent any computation unit. The way these ops will be added to the computation graph, will be decided by the [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) in Python API. 

@ -94,7 +63,7 @@ Since we want to create the regularization ops in a lazy manner, the regularizat

 #### High-level API

-In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we lso need to provide regularization functionality in layer functions. The design of these APIs can be postponed for later right now. A good reference for these APIs can be found in [Keras](https://keras.io/regularizers/) and also by looking at Tensorflow in [`tf.contrib.layers`](https://www.tensorflow.org/api_guides/python/contrib.layers).
+In PaddlePaddle Python API, users will primarily rely on [layer functions](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/python_api.md#layer-function) to create neural network layers. Hence, we also need to provide regularization functionality in layer functions. The design of these APIs can be postponed for later right now. A good reference for these APIs can be found in [Keras](https://keras.io/regularizers/) and also by looking at Tensorflow in [`tf.contrib.layers`](https://www.tensorflow.org/api_guides/python/contrib.layers).



--- a/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md
+++ b/doc/howto/cross_compiling/cross_compiling_for_raspberry_cn.md
@ -1,39 +1,36 @@
 # 构建Raspberry Pi平台上的PaddlePaddle库

-对于Rasspberry Pi系统，用户可通过ssh等方式登录到Raspberry Pi系统上，按照[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档所述，直接编译Raspberry Pi平台上适用的PaddlePaddle库。
+通常有两个方法来构建基于 Rasspberry Pi 的版本：

-用户也可以在自己熟悉的开发平台上，通过交叉编译的方式来编译。这篇文档将以Linux x86-64平台为例，介绍交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。
+1. 通过ssh等方式登录到Raspberry Pi系统上来构建。所需的开发工具和第三方库可以参考 [`/Dockerfile`](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile)。

-## 准备交叉编译环境
+1. 另一个方法是交叉编译。这篇文档介绍在 Linux/x64 上交叉编译Raspberry Pi平台上适用的PaddlePaddle的方法和步骤。

-从源码交叉编译PaddlePaddle，用户需要提前准备好交叉编译环境。用户可自行前往[github](https://github.com/raspberrypi/tools)下载Raspberry Pi平台使用的C/C++交叉编译工具链，也可通过以下命令获取：
+## 安装交叉编译器
+
+克隆下面 Github repo

 ```bash
 git clone https://github.com/raspberrypi/tools.git
 ```

-该github仓库中包含若干个预编译好的、针对不同平台的编译工具。宿主机是Linux x86-64环境，则需选用`arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`下的作为编译工具，所使用的编译器为arm-linux-gnueabihf-gcc 4.8.3。
-
-注意，该编译工具链需要系统glibc支持2.14以上。
+即可在 `./tools/tree/master/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64` 目录里找到交叉编译器 arm-linux-gnueabihf-gcc 4.8.3。运行该编译工具链需要一台 Linux x64 机器上以及 2.14版本以上的 glibc。

 ## 配置交叉编译参数

-CMake系统对交叉编译提供了支持[cmake-toolchains](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。为了简化cmake配置，PaddlePaddle为交叉编译提供了工具链配置文档[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake)，以提供一些默认的编译器和编译参数相关配置。
+CMake[支持交叉编译](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling)。PaddlePaddle for Raspberry Pi的配置信息在[cmake/cross_compiling/raspberry_pi.cmake](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake)。

 交叉编译Raspberry Pi版本PaddlePaddle库时，有一些必须配置的参数：

- `CMAKE_SYSTEM_NAME`，CMake编译的目标平台，必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后，PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本，并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。
-
-Raspberry Pi平台可选配置参数：
+- `CMAKE_SYSTEM_NAME`：CMake编译的目标平台，必须配置为`RPi`。在设置`CMAKE_SYSTEM_NAME=RPi`后，PaddlePaddle的CMake系统才认为在是在交叉编译Raspberry Pi系统的版本，并自动编译宿主机版protoc可执行文件、目标机版protobuf库、以及目标机版OpenBLAS库。

- `RPI_TOOLCHAIN`，编译工具链所在的绝对路径，或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器；否则，用户需要在cmake时手动设置这些值。无默认值。
- `RPI_ARM_NEON`，是否使用NEON指令。目前必须设置成`ON`，默认值为`ON`。
+- `RPI_TOOLCHAIN`：编译工具链所在的绝对路径，或者相对于构建目录的相对路径。PaddlePaddle的CMake系统将根据该值自动设置需要使用的交叉编译器；否则，用户需要在cmake时手动设置这些值。无默认值。

-其他配置参数：
+- `RPI_ARM_NEON`：是否使用NEON指令。目前必须设置成`ON`，默认值为`ON`。

 - `HOST_C/CXX_COMPILER`，宿主机的C/C++编译器。在编译宿主机版protoc可执行文件和目标机版OpenBLAS库时需要用到。默认设置成环境变量`CC`的值；若环境变量`CC`没有设置，则设置成`cc`编译器。

-cmake参数如下；
+一个常用的CMake配置如下：

 ```
 cmake -DCMAKE_SYSTEM_NAME=RPi \
@ -47,7 +44,9 @@ cmake -DCMAKE_SYSTEM_NAME=RPi \
      ..
 ```

-用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小，可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`；若希望最快的执行速度，则可设置`CMAKE_BUILD_TYPE`为`Release`。亦可以通过手动设置`CMAKE_C/CXX_FLAGS_MINSIZEREL/RELEASE`来影响PaddlePaddle的编译过程。
+其中`WITH_C_API=ON`表示需要构建推理库。
+
+用户还可根据自己的需求设置其他编译参数。比如希望最小化生成的库的大小，可以设置`CMAKE_BUILD_TYPE`为`MinSizeRel`；若希望最快的执行速度，则可设置`CMAKE_BUILD_TYPE`为`Release`。

 ## 编译和安装

@ -60,6 +59,4 @@ make install

 注意：如果你曾经在源码目录下编译过其他平台的PaddlePaddle库，请先使用`rm -rf`命令删除`third_party`目录和`build`目录，以确保所有的第三方依赖库和PaddlePaddle代码都是针对新的CMake配置重新编译的。

-执行完安装命令后，由于上一步cmake配置中`WITH_C_API`设置为`ON`，`your/path/to/install`目录中会包含`include`和`lib`目录，其中`include`中包含C-API的头文件，`lib`中包含一个Raspberry Pi版本的库。
-
-更多的编译配置见[源码编译PaddlePaddle](http://www.paddlepaddle.org/doc_cn/getstarted/build_and_install/cmake/build_from_source_cn.html)相关文档。
+执行完安装命令后，，`your/path/to/install`目录中会包含`include`和`lib`目录，其中`include`中包含C-API的头文件，`lib`中包含一个Raspberry Pi版本的库。
--- a/doc/howto/cross_compiling/cross_compiling_for_raspberry_en.md
+++ b/doc/howto/cross_compiling/cross_compiling_for_raspberry_en.md
@ -0,0 +1,62 @@
+# Build PaddlePaddle for Raspberry Pi
+
+You may use any of the following two approaches to build the inference library of PaddlePaddle for Raspberry Pi:
+
+1. Build using SSH: Log in to a Raspberry Pi using SSH and build the library. The required development tools and third-party dependencies are listed in here: [`/Dockerfile`](https://github.com/PaddlePaddle/Paddle/blob/develop/Dockerfile).
+
+1. Cross-compile: We talk about how to cross-compile PaddlePaddle for Raspberry Pi on a Linux/x64 machine, in more detail in this article.
+
+## The Cross-Compiling Toolchain
+
+Step 1. Clone the Github repo by running the following command.
+
+```bash
+git clone https://github.com/raspberrypi/tools.git
+```
+
+Step 2. Use the pre-built cross-compiler found in `./tools/tree/master/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64`.  To run it on a Linux computer, glibc version >= 2.14 is needed.
+
+## CMake Arguments
+
+CMake supports [cross-compiling](https://cmake.org/cmake/help/v3.0/manual/cmake-toolchains.7.html#cross-compiling).  All CMake configuration arguments required for the cross-compilation for Raspberry Pi can be found in [`cmake/cross_compiling/raspberry_pi.cmake`](https://github.com/PaddlePaddle/Paddle/blob/develop/cmake/cross_compiling/raspberry_pi.cmake).
+
+Some important arguments that need to be set:
+
+- `CMAKE_SYSTEM_NAME`: The target platform.  Must be `RPi`.
+
+- `RPI_TOOLCHAIN`: The absolute path of the cross-compiling toolchain.
+
+- `RPI_ARM_NEON`: Use ARM NEON Intrinsics. This is a required argument and set default to `ON`.
+
+- `HOST_C/CXX_COMPILER`: The C/C++ compiler for the host.  It is used to build building tools running on the host, for example, protoc.
+
+A commonly-used CMake configuration is as follows:
+
+```
+cmake -DCMAKE_SYSTEM_NAME=RPi \
+      -DRPI_TOOLCHAIN=your/path/to/arm-bcm2708/gcc-linaro-arm-linux-gnueabihf-raspbian-x64 \
+      -DRPI_ARM_NEON=ON \
+      -DCMAKE_INSTALL_PREFIX=your/path/to/install \
+      -DWITH_GPU=OFF \
+      -DWITH_C_API=ON \
+      -DWITH_PYTHON=OFF \
+      -DWITH_SWIG_PY=OFF \
+      ..
+```
+
+To build the inference library, please set the argument WITH_API to ON: `WITH_C_API=ON`.
+
+You can add more arguments. For example, to minimize the size of the generated inference library, you may use `CMAKE_BUILD_TYPE=MinSizeRel`. For performance optimization, you may use `CMAKE_BUILD_TYPE=Release`.
+
+## Build and Install
+
+The following commands build the inference library of PaddlePaddle for Raspberry Pi and third-party dependencies.
+
+```bash
+make
+make install
+```
+
+ The intermediate files will be stored in `build`. Third-party libraries will be located in `build/third_party`. If you have already built it for other platforms like Android or iOS, you may want to clear these directories by running the command: `rm -rf build`.
+
+The infernece library will be in `your/path/to/install/lib`, with related header files in `your/path/to/install/include`.
--- a/doc/howto/dev/contribute_to_paddle_en.md
+++ b/doc/howto/dev/contribute_to_paddle_en.md
@ -1,219 +0,0 @@
-# Contribute Code
-
-We sincerely appreciate your contributions. You can use fork and pull request
-workflow to merge your code.
-
-## Code Requirements
- Your code comments must be fully documented by
-  [Doxygen](http://www.stack.nl/~dimitri/doxygen/) style.
- Make sure the compiler option `WITH_STYLE_CHECK` is on and the compiler
-  passes the code style check.
- All code must have unit test.
- Pass all unit tests.
-
-The following tutorial guides you into submitting your contibution.
-
-## [Creating a Fork](https://help.github.com/articles/fork-a-repo/)
-
-Just head over to the GitHub page and click the "Fork" button.
-It's just that simple.
-
-## Clone
-
-Clone remote repository.
-
-```bash
-➜  git clone https://github.com/USERNAME/Paddle
-➜  cd Paddle
-```
-
-## Create a local branch
-
-Paddle is currently using [Git-flow branching model](http://nvie.com/posts/a-successful-git-branching-model/).
-
-All feature and bug fix development work should be done on a new branch, generally create new branch from `develop` branch .
-
-```bash
-➜  git checkout -b my-cool-stuff
-```
-
-Before the checkout, you need to keep the current branch directory clean, otherwise the untracked file will be brought to the new branch, which can be inspected by `git status`.
-
-## Using `pre-commit` hook
-
-Paddle developers use [pre-commit](http://pre-commit.com/) tool to manage git
-pre-commit hooks. It can help us format source codes (cpp, python), check some
-basic thing before commit (only one EOL for each file, do not add a huge file
-in git). `pre-commit` tests is a part of unit tests in Travis-CI now, every
-PR doesn't fit hook can not be merged into Paddle.
-
-To use [pre-commit](http://pre-commit.com/), you should install it by
-`pip install pre-commit`, and currently, Paddle uses `clang-format` to format
-c/cpp sources. Please make sure clang-format 3.8+ installed.
-
-Install and run it as follow:
-
-```bash
-➜  pip install pre-commit
-➜  pre-commit install
-```
-
-When you commit your code, the pre-commit hook will check the local code if there is
-anything not suitable to commit, and so on.
-
-## Start to develop
-
-In this tutorial, I delete a line in README.md and created a new file.
-
-We can use `git status` to inspect the changes of current directory, `git diff` to see difference.
-
-```bash
-➜  git status
-On branch test
-Changes not staged for commit:
-  (use "git add <file>..." to update what will be committed)
-  (use "git checkout -- <file>..." to discard changes in working directory)
-
-	modified:   README.md
-
-Untracked files:
-  (use "git add <file>..." to include in what will be committed)
-
-	test
-
-no changes added to commit (use "git add" and/or "git commit -a")
-```
-## Build and Test
-
-We package PaddlePaddle's compile environment into a Docker image, called the develop image named `paddle:dev`, it contains all compiling tools that PaddlePaddle needs. 
-
-If you want to build the develop image, just run:
-
-```bash
-➜  docker build -t paddle:dev .
-```
-
-Then we can use the develop image to build PaddlePaddle source. For example:
-
-```bash
-➜  docker run -v $(pwd):/paddle -e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_TEST=ON" paddle:dev
-```
-
-The above command will compile PaddlePaddle and create a Dockerfile for building production image. All the generated files are in the build directory. "WITH_GPU" controls if the generated production image supports GPU. "WITH_AVX" controls if the generated production image supports AVX. "WITH_TEST" controls if the unit test will be generated.
-
-Then we can generate the production image by copying the compiled PaddlePaddle program into the image by
-
-```bash
-➜  docker build -t paddle:prod -f build/Dockerfile .
-```
-
-Run unit test finally:
-
-```bash
-➜  docker run -it -v $(pwd):/paddle paddle:dev bash -c "cd /paddle/build && ctest"
-```
-
-For more details, you can read [this doc](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/getstarted/build_and_install/docker_install_en.rst).
-
-## Commit
-
-Next we cancel the changes to the README.md file and then commit our changes by following command lines:
-
-```bash
-➜  git checkout -- README.md
-➜  git status
-On branch test
-Untracked files:
-  (use "git add <file>..." to include in what will be committed)
-
-	test
-
-nothing added to commit but untracked files present (use "git add" to track)
-➜  git add test
-```
-
-We should write a description of each commit by `git commit` to allow others to know
-the changes in these files.
-
-```bash
-➜  git commit
-CRLF end-lines remover...............................(no files to check)Skipped
-yapf.................................................(no files to check)Skipped
-Check for added large files..............................................Passed
-Check for merge conflicts................................................Passed
-Check for broken symlinks................................................Passed
-Detect Private Key...................................(no files to check)Skipped
-Fix End of Files.....................................(no files to check)Skipped
-clang-formater.......................................(no files to check)Skipped
-[my-cool-stuff c703c041] add test file
- 1 file changed, 0 insertions(+), 0 deletions(-)
- create mode 100644 233
-```
-
-## Keeping Fork Up to Date
-
-Before pull your request, you should sync your code from the latest PaddlePaddle.
-To do this, you'll need to add a remote at first:
-
-```bash
-➜  git remote add upstream https://github.com/PaddlePaddle/Paddle
-➜  git remote
-origin
-upstream
-```
-
-Update your fork with the latest upstream changes:
-
-```bash
-➜  git fetch upstream
-➜  git pull upstream develop
-```
-
-Now, your local master branch is up-to-date with everything modified upstream.
-
-## Push to GitHub
-
-```bash
-# push to your repository in Github
-➜  git push origin my-cool-stuff
-```
-
-## Create an issue and a Pull Request
-
-Create an Issue to describe the problem and record its number.
-
-Go to the page for your fork on GitHub, select your development branch,
-and click the `New pull request`.
-
-<img width="295" alt="screen shot 2017-04-26 at 9 09 28 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436054/a6d98c66-2ac4-11e7-9cb1-18dd13150230.png">
-
-Then select the target branch:
-
-<img width="750" alt="screen shot 2017-04-26 at 9 11 52 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436139/f83b1e6c-2ac4-11e7-8c0e-add499023c46.png">
-
-We can add `resolve #Issue number` in PR description to close the issue automatically after the PR is merge. More details in <https://help.github.com/articles/closing-issues-via-commit-messages/>.
-
-Then wait for review, if there need to modify, refer to the above steps to update the corresponding origin branch.
-
-## Delete origin branch
-
-After the PR is merge into the main repository, we can delete the remote branch on the PR page.
-
-<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
-
-Or just run:
-
-```bash
-➜  git push origin :my-cool-stuff
-```
-
-## Delete local branch
-
-Finally, we delete local branch:
-
-```bash
-➜  git checkout develop 
-
-# delete my-cool-stuff branch
-➜  git branch -D my-cool-stuff
-```
--- a/doc/howto/dev/contribute_to_paddle_en.md
+++ b/doc/howto/dev/contribute_to_paddle_en.md
@ -0,0 +1 @@
+../../../CONTRIBUTING.md
--- a/doc/howto/index_cn.rst
+++ b/doc/howto/index_cn.rst
@ -21,7 +21,6 @@

  dev/build_cn.rst
  dev/write_docs_cn.rst
-  dev/contribute_to_paddle_cn.md

 模型配置
 --------
--- a/go/.gitignore
+++ b/go/.gitignore
@ -1,2 +1,3 @@
 vendor/
 .glide/
+proto/*.go
--- a/go/cmd/master/master.go
+++ b/go/cmd/master/master.go
@ -25,9 +25,8 @@ import (
 	"strings"
 	"time"

+	log "github.com/inconshreveable/log15"
 	"github.com/namsral/flag"
-	log "github.com/sirupsen/logrus"
-	"github.com/topicai/candy"

 	"github.com/PaddlePaddle/Paddle/go/master"
 	"github.com/PaddlePaddle/Paddle/go/utils/networkhelper"
@ -41,16 +40,20 @@ func main() {
 	taskTimeoutMax := flag.Int("task-timeout-max", 3, "max timtout count for each task before it being declared failed task.")
 	chunkPerTask := flag.Int("chunk-per-task", 10, "chunk per task.")
 	logLevel := flag.String("log-level", "info",
-		"log level, possible values: debug, info, warning, error, fatal, panic")
+		"log level, possible values: debug, info, warn, error, crit")
 	flag.Parse()

-	level, e := log.ParseLevel(*logLevel)
-	candy.Must(e)
+	lvl, err := log.LvlFromString(*logLevel)
+	if err != nil {
+		panic(err)
+	}

-	log.SetLevel(level)
+	log.Root().SetHandler(
+		log.LvlFilterHandler(lvl, log.CallerStackHandler("%+v", log.StderrHandler)),
+	)

 	if *endpoints == "" {
-		log.Warningln("-endpoints not set, fault tolerance not be enabled.")
+		log.Warn("-endpoints not set, fault tolerance not be enabled.")
 	}

 	var store master.Store
@ -58,23 +61,25 @@ func main() {
 		eps := strings.Split(*endpoints, ",")
 		ip, err := networkhelper.GetExternalIP()
 		if err != nil {
-			log.Fatal(err)
+			log.Crit("get external ip error", log.Ctx{"error": err})
+			panic(err)
 		}

 		addr := fmt.Sprintf("%s:%d", ip, *port)
 		store, err = master.NewEtcdClient(eps, addr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, *ttlSec)
 		if err != nil {
-			log.Fatal(err)
+			log.Crit("error creating etcd client.", log.Ctx{"error": err})
+			panic(err)
 		}
 	} else {
 		store = &master.InMemStore{}
 	}

 	shutdown := func() {
-		log.Infoln("shutting down gracefully")
+		log.Info("shutting down gracefully")
 		err := store.Shutdown()
 		if err != nil {
-			log.Errorln(err)
+			log.Error("shutdown error", log.Ctx{"error": err})
 		}
 	}

@ -86,24 +91,28 @@ func main() {

 	s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax)
 	if err != nil {
-		log.Fatal(err)
+		log.Crit("error creating new service.", log.Ctx{"error": err})
+		panic(err)
 	}

 	err = rpc.Register(s)
 	if err != nil {
-		log.Fatal(err)
+		log.Crit("error registering to etcd.", log.Ctx{"error": err})
+		panic(err)
 	}

 	rpc.HandleHTTP()
 	l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
 	if err != nil {
-		log.Fatal(err)
+		log.Crit("error listing to port", log.Ctx{"error": err, "port": *port})
+		panic(err)
 	}

 	go func() {
 		err = http.Serve(l, nil)
 		if err != nil {
-			log.Fatal(err)
+			log.Crit("error serving HTTP", log.Ctx{"error": err})
+			panic(err)
 		}
 	}()

--- a/go/cmd/pserver/pserver.go
+++ b/go/cmd/pserver/pserver.go
@ -27,11 +27,11 @@ import (
 	"github.com/topicai/candy"

 	"github.com/PaddlePaddle/Paddle/go/pserver"
-	log "github.com/sirupsen/logrus"
+	log "github.com/inconshreveable/log15"
 )

 func main() {
-	port := flag.Int("port", 0, "port of the pserver")
+	port := flag.Int("port", 8001, "port of the pserver")
 	index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry")
 	etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379",
 		"comma separated endpoint string for pserver to connect to etcd")
@ -41,13 +41,17 @@ func main() {
 	checkpointPath := flag.String("checkpoint-path", "/checkpoints/", "save checkpoint path")
 	checkpointInterval := flag.Duration("checkpoint-interval", 600*time.Second, "save checkpoint per interval seconds")
 	logLevel := flag.String("log-level", "info",
-		"log level, possible values: debug, info, warning, error, fatal, panic")
+		"log level, possible values: debug, info, warn, error, crit")
 	flag.Parse()

-	level, err := log.ParseLevel(*logLevel)
-	candy.Must(err)
+	lvl, err := log.LvlFromString(*logLevel)
+	if err != nil {
+		panic(err)
+	}

-	log.SetLevel(level)
+	log.Root().SetHandler(
+		log.LvlFilterHandler(lvl, log.CallerStackHandler("%+v", log.StderrHandler)),
+	)

 	var idx int

@ -63,7 +67,7 @@ func main() {
 		cp, err = pserver.LoadCheckpoint(e, idx)
 		if err != nil {
 			if err == pserver.ErrCheckpointNotFound {
-				log.Infof("Could not find the pserver checkpoint.")
+				log.Info("load checkpoint error", "error", err)
 			} else {
 				panic(err)
 			}
@ -71,10 +75,10 @@ func main() {
 	}

 	shutdown := func() {
-		log.Infoln("shutting down gracefully")
+		log.Info("shutting down gracefully")
 		sErr := e.Shutdown()
 		if sErr != nil {
-			log.Errorln(sErr)
+			log.Error("error shutting down", log.Ctx{"error": sErr})
 		}
 	}

@ -95,7 +99,7 @@ func main() {
 	candy.Must(err)

 	go func() {
-		log.Infof("start pserver at port %d", *port)
+		log.Info("serving pserver", log.Ctx{"port": *port})
 		err = http.Serve(l, nil)
 		candy.Must(err)
 	}()
--- a/go/glide.lock
+++ b/go/glide.lock
@ -1,5 +1,5 @@
-hash: 328e7b9b7306b45e7b9879139a9f86698115981f6283032e1312093a6a6ddb04
-updated: 2017-10-16T08:00:23.484693528Z
+hash: 107c058cf5c9163a75d40eef2273a793c36112683c25d72aa8288827fdde3a19
+updated: 2017-10-30T03:46:19.137696069Z
 imports:
 - name: github.com/alecthomas/gometalinter
  version: bae2f1293d092fd8167939d5108d1b025eaef9de
@ -99,6 +99,8 @@ imports:
  version: d2709f9f1f31ebcda9651b03077758c1f3a0018c
 - name: github.com/ghodss/yaml
  version: 0ca9ea5df5451ffdf184b4428c902747c2c11cd7
+- name: github.com/go-stack/stack
+  version: 817915b46b97fd7bb80e8ab6b69f01a53ac3eebf
 - name: github.com/gogo/protobuf
  version: 909568be09de550ed094403c2bf8a261b5bb730a
  subpackages:
@ -120,8 +122,14 @@ imports:
  - runtime
  - runtime/internal
  - utilities
+- name: github.com/inconshreveable/log15
+  version: 0decfc6c20d9ca0ad143b0e89dcaa20f810b4fb3
 - name: github.com/jonboulle/clockwork
  version: 2eee05ed794112d45db504eb05aa693efd2b8b09
+- name: github.com/mattn/go-colorable
+  version: 5411d3eea5978e6cdc258b30de592b60df6aba96
+- name: github.com/mattn/go-isatty
+  version: 57fdcb988a5c543893cc61bce354a6e24ab70022
 - name: github.com/matttproud/golang_protobuf_extensions
  version: c12348ce28de40eed0136aa2b644d0ee0650e56c
  subpackages:
@ -179,11 +187,12 @@ imports:
  - lex/httplex
  - trace
 - name: golang.org/x/sys
-  version: 0f826bdd13b500be0f1d4004938ad978fcc6031e
+  version: e48874b42435b4347fc52bdee0424a52abc974d7
  repo: https://github.com/golang/sys.git
  vcs: git
  subpackages:
  - unix
+  - windows
 - name: golang.org/x/text
  version: 836efe42bb4aa16aaa17b9c155d8813d336ed720
  repo: https://github.com/golang/text.git
@ -222,4 +231,3 @@ testImports:
  version: 05e8a0eda380579888eb53c394909df027f06991
  subpackages:
  - assert
-
--- a/go/glide.yaml
+++ b/go/glide.yaml
@ -26,3 +26,8 @@ import:
  version: v1.1.0
 - package: github.com/alecthomas/gometalinter
  version: v1.2.1
+- package: github.com/inconshreveable/log15
+  version: v2.13
+- package: github.com/go-stack/stack
+  version: v1.6.0
+- package: github.com/golang/protobuf
--- a/go/master/c/client.go
+++ b/go/master/c/client.go
@ -35,13 +35,19 @@ import (
 	"unsafe"

 	"github.com/PaddlePaddle/Paddle/go/master"
-	log "github.com/sirupsen/logrus"
+	log "github.com/inconshreveable/log15"
 )

 var mu sync.Mutex
 var handleMap = make(map[C.paddle_master_client]*master.Client)
 var curHandle C.paddle_master_client

+func init() {
+	log.Root().SetHandler(
+		log.LvlFilterHandler(log.LvlWarn, log.CallerStackHandler("%+v", log.StderrHandler)),
+	)
+}
+
 func add(c *master.Client) C.paddle_master_client {
 	mu.Lock()
 	defer mu.Unlock()
@ -117,7 +123,8 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int
 	}
 	err := c.SetDataset(paths)
 	if err != nil {
-		log.Errorln(err)
+		log.Error("error set dataset",
+			log.Ctx{"error": err, "paths": paths})
 		return C.PADDLE_MASTER_ERROR
 	}

@ -167,7 +174,7 @@ func paddle_request_save_model(client C.paddle_master_client, trainerID string,
 	c := get(client)
 	need, err := c.RequestSaveModel(trainerID, time.Duration(blockMS)*time.Millisecond)
 	if err != nil {
-		log.Errorln(err)
+		log.Error("error request save model", log.Ctx{"error": err})
 		return C.PADDLE_MASTER_ERROR
 	}

--- a/go/master/client.go
+++ b/go/master/client.go
@ -21,7 +21,7 @@ import (
 	"github.com/PaddlePaddle/Paddle/go/connection"
 	"github.com/PaddlePaddle/recordio"
 	"github.com/coreos/etcd/clientv3"
-	log "github.com/sirupsen/logrus"
+	log "github.com/inconshreveable/log15"
 )

 // Client is the client of the master server.
@ -75,7 +75,7 @@ func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error {
 		for {
 			err := f()
 			if err != nil {
-				log.Warningln(err)
+				log.Warn("create etcd client error", log.Ctx{"error": err})
 			} else {
 				break
 			}
@ -121,6 +121,7 @@ func (c *Client) StartGetRecords(passID int) {
 }

 func (c *Client) getRecords(passID int) {
+	i := 0
 	for {
 		t, err := c.getTask(passID)
 		if err != nil {
@ -130,18 +131,26 @@ func (c *Client) getRecords(passID int) {
 				c.ch <- record{nil, err}
 				break
 			}
-			if err.Error() == ErrPassAfter.Error() {
-				// wait util last pass finishes
-				time.Sleep(time.Second * 3)
-				continue
+
+			if i%60 == 0 {
+				log.Debug("getTask of passID error.",
+					log.Ctx{"error": err, "passID": passID})
+				i = 0
 			}
-			log.Errorf("getTask error: %s", err)
+
+			// if err.Error() == ErrPassAfter.Error()
+			//   wait util last pass finishes
+			// if other error such as network error
+			//   wait to reconnect or task time out
+			time.Sleep(time.Second * 3)
+			i += 3
+			continue
 		}

 		for _, chunk := range t.Chunks {
 			f, e := os.Open(chunk.Path)
 			if e != nil {
-				log.Errorln(e)
+				log.Error("error open chunk", log.Ctx{"error": e})
 				continue
 			}

@ -152,12 +161,15 @@ func (c *Client) getRecords(passID int) {

 			if s.Err() != nil {
 				c.ch <- record{nil, s.Err()}
-				log.Errorln(err, chunk.Path)
+				log.Error(
+					"error scan chunk",
+					log.Ctx{"error": err, "path": chunk.Path},
+				)
 			}

 			err = f.Close()
 			if err != nil {
-				log.Errorln(err)
+				log.Error("error close record file", log.Ctx{"error": err})
 			}
 		}

@ -166,7 +178,7 @@ func (c *Client) getRecords(passID int) {
 		// correct, but a reasonable approximation.
 		err = c.taskFinished(t.Meta.ID)
 		if err != nil {
-			log.Errorln(err)
+			log.Error("task finish callback error.", log.Ctx{"error": err})
 		}
 	}
 }
@ -179,12 +191,12 @@ func (c *Client) monitorMaster(addrCh <-chan string) {
 			if curMaster == "" {
 				err := c.conn.Close()
 				if err != nil {
-					log.Errorln(err)
+					log.Error("close old master addr error", log.Ctx{"error": err})
 				}
 			} else {
 				err := c.conn.Connect(curMaster)
 				if err != nil {
-					log.Errorln(err)
+					log.Error("connect to new master addr error", log.Ctx{"error": err})

 					// connect to addr failed, set
 					// to last known addr in order
--- a/go/master/client_internal_test.go
+++ b/go/master/client_internal_test.go
@ -25,8 +25,6 @@ import (
 	"testing"
 	"time"

-	log "github.com/sirupsen/logrus"
-
 	"github.com/PaddlePaddle/Paddle/go/connection"
 	"github.com/PaddlePaddle/recordio"
 )
@ -36,10 +34,6 @@ const (
 	chunkPerTask = 10
 )

-func init() {
-	log.SetLevel(log.ErrorLevel)
-}
-
 func TestGetFinishTask(t *testing.T) {
 	const path = "/tmp/master_client_test_0"

--- a/go/master/client_test.go
+++ b/go/master/client_test.go
@ -117,6 +117,7 @@ func TestNextRecord(t *testing.T) {
 			if e != nil {
 				panic(e)
 			}
+
 			// test for n passes
 			for pass := 0; pass < 10; pass++ {
 				c.StartGetRecords(pass)
--- a/go/master/etcd_client.go
+++ b/go/master/etcd_client.go
@ -20,7 +20,7 @@ import (

 	"github.com/coreos/etcd/clientv3"
 	"github.com/coreos/etcd/clientv3/concurrency"
-	log "github.com/sirupsen/logrus"
+	log "github.com/inconshreveable/log15"
 )

 const (
@ -44,7 +44,7 @@ type EtcdClient struct {

 // NewEtcdClient creates a new EtcdClient.
 func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) {
-	log.Debugf("Connecting to etcd at %v", endpoints)
+	log.Debug("Connecting to etcd", log.Ctx{"endpoint": endpoints})
 	cli, err := clientv3.New(clientv3.Config{
 		Endpoints:   endpoints,
 		DialTimeout: dialTimeout,
@ -64,12 +64,12 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
 	// one master running, but split-brain problem may cause
 	// multiple master servers running), and the cluster management
 	// software will kill one of them.
-	log.Infof("Trying to acquire lock at %s.", lockPath)
+	log.Info("Trying to acquire lock.", log.Ctx{"path": lockPath})
 	err = lock.Lock(context.TODO())
 	if err != nil {
 		return nil, err
 	}
-	log.Infof("Successfully acquired lock at %s.", lockPath)
+	log.Info("Successfully acquired lock at %s.", log.Ctx{"path": lockPath})

 	put := clientv3.OpPut(addrPath, addr)
 	resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit()
@ -78,7 +78,8 @@ func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePat
 	}

 	if !resp.Succeeded {
-		log.Fatal("No longer owns the master lock. Exiting.")
+		log.Crit("No longer owns the master lock. Exiting.")
+		panic("No longer owns the master lock. Exiting.")
 	}

 	e := &EtcdClient{
@ -102,7 +103,7 @@ func (e *EtcdClient) Save(state []byte) error {
 	}

 	if !resp.Succeeded {
-		log.Errorln("No longer owns the lock, trying to lock again")
+		log.Error("No longer owns the lock, trying to lock again")
 		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 		err := e.lock.Lock(ctx)
 		cancel()
@ -116,9 +117,10 @@ func (e *EtcdClient) Save(state []byte) error {
 			// to kill current master server. The current
 			// state is not saved, but the trainer's RPC
 			// call will fail, so the trainer will retry.
-			log.Fatalf("Could not acquire the lock at %s: %v. Exiting.", e.lockPath, err)
+			log.Crit("Could not acquire the lock at %s: %v. Exiting.", log.Ctx{"path": e.lockPath, "error": err})
+			panic("Could not acquire the lock at %s: %v. Exiting.")
 		}
-		log.Infof("Successfully acquired lock at %s.", e.lockPath)
+		log.Info("Successfully acquired lock at %s.", e.lockPath)
 		return e.Save(state)
 	}

@ -136,7 +138,7 @@ func (e *EtcdClient) Load() ([]byte, error) {
 	}

 	if !resp.Succeeded {
-		log.Errorln("No longer owns the lock, trying to lock and load again.")
+		log.Error("No longer owns the lock, trying to lock and load again.")
 		err = e.lock.Lock(context.Background())
 		if err != nil {
 			return nil, err
@ -163,7 +165,7 @@ func (e *EtcdClient) Shutdown() error {
 		if err == nil {
 			err = newErr
 		} else {
-			log.Errorln(newErr)
+			log.Error("shutdown error", log.Ctx{"error": newErr})
 		}
 	}

@ -192,7 +194,7 @@ func watchKey(c *clientv3.Client, key string, valChan chan<- string) {
 	for wresp := range rch {
 		for _, ev := range wresp.Events {
 			// if received event is DELETE, the value will be an empty string
-			log.Infof("received event %s, %q : %q\n", ev.Type, ev.Kv.Key, ev.Kv.Value)
+			log.Info("received event.", log.Ctx{"type": ev.Type, "key": ev.Kv.Key, "value": ev.Kv.Value})
 			valChan <- string(ev.Kv.Value)
 		}
 	}
--- a/Show More
+++ b/Show More