Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into no_counter_on_pserver

7 years ago · 136a591911
parent 007e6c2518 1185a1b5ab
commit 136a591911
220 changed files with 4585 additions and 1236 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -156,6 +156,7 @@ include(rdma)               # set rdma libraries
 include(flags)              # set paddle compile flags
 include(version)            # set PADDLE_VERSION
 include(coveralls)          # set code coverage
+include(inference_lib)      # add paddle fluid inference libraries


 include_directories("${PADDLE_SOURCE_DIR}")
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@ -28,9 +28,3 @@ endif()
 add_dependencies(eigen3 extern_eigen3)

 LIST(APPEND external_project_dependencies eigen3)
-
-IF(NOT WITH_C_API AND WITH_FLUID)
-    INSTALL(FILES ${EIGEN_INCLUDE_DIR}/Eigen/Core DESTINATION third_party/eigen3/Eigen)
-    INSTALL(DIRECTORY ${EIGEN_INCLUDE_DIR}/Eigen/src DESTINATION third_party/eigen3/Eigen)
-    INSTALL(DIRECTORY ${EIGEN_INCLUDE_DIR}/unsupported/Eigen DESTINATION third_party/eigen3/unsupported)
-ENDIF()
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@ -52,7 +52,7 @@ ADD_DEPENDENCIES(gflags extern_gflags)

 LIST(APPEND external_project_dependencies gflags)

-IF(WITH_C_API OR WITH_FLUID)
+IF(WITH_C_API)
  INSTALL(DIRECTORY ${GFLAGS_INCLUDE_DIR} DESTINATION third_party/gflags)
  IF(ANDROID)
    INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib/${ANDROID_ABI})
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@ -68,7 +68,7 @@ LINK_LIBRARIES(glog gflags)

 LIST(APPEND external_project_dependencies glog)

-IF(WITH_C_API OR WITH_FLUID)
+IF(WITH_C_API)
  INSTALL(DIRECTORY ${GLOG_INCLUDE_DIR} DESTINATION third_party/glog)
  IF(ANDROID)
    INSTALL(FILES ${GLOG_LIBRARIES} DESTINATION third_party/glog/lib/${ANDROID_ABI})
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -250,7 +250,7 @@ IF(NOT PROTOBUF_FOUND)
    SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
        CACHE FILEPATH "protoc library." FORCE)

-    IF(WITH_C_API OR WITH_FLUID)
+    IF(WITH_C_API)
        INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf)
        IF(ANDROID)
            INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI})
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@ -52,6 +52,7 @@ ExternalProject_Add(
                    -DWITH_TORCH=OFF
                    -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
                    -DBUILD_SHARED=ON
+                    -DBUILD_TESTS=OFF
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -179,20 +179,24 @@ function(cc_library TARGET_NAME)
  set(oneValueArgs "")
  set(multiValueArgs SRCS DEPS)
  cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
-  if (cc_library_SRCS)
-    if (cc_library_SHARED OR cc_library_shared) # build *.so
+  if(cc_library_SRCS)
+    if(cc_library_SHARED OR cc_library_shared) # build *.so
      add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
    else()
      add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
    endif()
-    if (cc_library_DEPS)
+    if(cc_library_DEPS)
      # Don't need link libwarpctc.so
-      if ("${cc_library_DEPS};" MATCHES "warpctc;")
+      if("${cc_library_DEPS};" MATCHES "warpctc;")
        list(REMOVE_ITEM cc_library_DEPS warpctc)
        add_dependencies(${TARGET_NAME} warpctc)
      endif()
+      # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
+      target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
+      if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
+        list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
+      endif()
      add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
-      target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
    endif()
    
    # cpplint code style
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -0,0 +1,90 @@
+# make package for paddle fluid shared and static library
+function(copy TARGET)
+    set(options "")
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DSTS DEPS)
+    cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
+    list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
+    if(NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len})
+        message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers")
+    endif()
+    math(EXPR len "${copy_lib_SRCS_len} - 1")
+    
+    add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS})
+    foreach(index RANGE ${len})
+        list(GET copy_lib_SRCS ${index} src)
+        list(GET copy_lib_DSTS ${index} dst)
+        add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND mkdir -p "${dst}")
+        if(IS_DIRECTORY ${src})
+            add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp -r "${src}" "${dst}")
+        else()
+            add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND cp "${src}" "${dst}")
+        endif()
+    endforeach()
+endfunction()
+
+# third party
+set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/eigen3")
+copy(eigen3_lib
+  SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen
+  DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported
+)
+
+set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/gflags")
+copy(gflags_lib
+  SRCS ${GFLAGS_INCLUDE_DIR} ${GFLAGS_LIBRARIES}
+  DSTS ${dst_dir} ${dst_dir}/lib
+)
+
+set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/glog")
+copy(glog_lib
+  SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
+  DSTS ${dst_dir} ${dst_dir}/lib
+)
+
+IF(NOT PROTOBUF_FOUND)
+    set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/protobuf")
+    copy(protobuf_lib
+      SRCS ${PROTOBUF_INCLUDE_DIR} ${PROTOBUF_LITE_LIBRARY}
+      DSTS ${dst_dir} ${dst_dir}/lib
+    )
+ENDIF(NOT PROTOBUF_FOUND)
+
+# paddle fluid module
+set(src_dir "${PADDLE_SOURCE_DIR}/paddle")
+set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle")
+set(module "framework")
+copy(framework_lib DEPS framework_py_proto 
+  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/framework/framework.pb.h
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
+)
+
+set(module "memory")
+copy(memory_lib
+  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail
+)
+
+set(module "inference")
+copy(inference_lib DEPENDS paddle_fluid_shared
+  SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/inference/libpaddle_fluid.so
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module}
+)
+
+set(module "platform")
+copy(platform_lib
+  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details
+)
+
+set(module "string")
+copy(string_lib
+  SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/tinyformat/*.h
+  DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat
+)
+
+add_custom_target(inference_lib_dist DEPENDS 
+  inference_lib framework_lib memory_lib platform_lib string_lib
+  gflags_lib glog_lib protobuf_lib eigen3_lib)
--- a/doc/build_and_install/build_cn.md
+++ b/doc/build_and_install/build_cn.md
--- a/doc/build_and_install/build_en.md
+++ b/doc/build_and_install/build_en.md
--- a/doc/getstarted/build_and_install/build_from_source_cn.rst
+++ b/doc/getstarted/build_and_install/build_from_source_cn.rst
--- a/doc/getstarted/build_and_install/build_from_source_en.rst
+++ b/doc/getstarted/build_and_install/build_from_source_en.rst
--- a/doc/getstarted/build_and_install/docker_install_cn.rst
+++ b/doc/getstarted/build_and_install/docker_install_cn.rst
--- a/doc/getstarted/build_and_install/docker_install_en.rst
+++ b/doc/getstarted/build_and_install/docker_install_en.rst
--- a/doc/getstarted/build_and_install/index_cn.rst
+++ b/doc/getstarted/build_and_install/index_cn.rst
@ -13,7 +13,7 @@ PaddlePaddle提供pip和Docker的安装方式：

   pip_install_cn.rst
   docker_install_cn.rst
-   ../../howto/dev/build_cn.md
+   build_cn.md

 编译流程
 ++++++++
--- a/doc/getstarted/build_and_install/index_en.rst
+++ b/doc/getstarted/build_and_install/index_en.rst
@ -13,7 +13,7 @@ You can choose either pip or Docker to complete your install:

   pip_install_en.rst
   docker_install_en.rst
-   ../../howto/dev/build_en.md
+   build_en.md


 Build from Source
--- a/doc/getstarted/build_and_install/paddleci.png
+++ b/doc/getstarted/build_and_install/paddleci.png
--- a/doc/getstarted/build_and_install/pip_install_cn.rst
+++ b/doc/getstarted/build_and_install/pip_install_cn.rst
--- a/doc/getstarted/build_and_install/pip_install_en.rst
+++ b/doc/getstarted/build_and_install/pip_install_en.rst
--- a/doc/design/auto_gradient_check.md
+++ b/doc/design/auto_gradient_check.md
@ -1,23 +1,23 @@
-## Auto Gradient Checker Design
+## Auto Gradient Check Design

-## Backgraound：
- Generally, it is easy to check whether the forward computation of an Operator is correct or not. However, backpropagation is a notoriously difficult algorithm to debug and get right:
-  1. you should get the right backpropagation formula according to the forward computation.
-  2. you should implement it right in CPP.
-  3. it's difficult to prepare test data.
+## Background：
+- Generally, it is easy to check whether the forward computation of an Operator is correct or not. However, backpropagation is a notoriously difficult algorithm to debug and get right because of the following challenges:
+  1. The formula for backpropagation formula should be correct according to the forward computation.
+  2. The Implementation of the above shoule be correct in CPP.
+  3. It is difficult to prepare an unbiased test data.

- Auto gradient checking gets a numerical gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages:
-  1. numerical gradient checker only need forward operator.
-  2. user only need to prepare the input data for forward Operator.
+- Auto gradient checking gets a numerical gradient using forward Operator and uses it as a reference for the backward Operator's result. It has several advantages:
+  1. Numerical gradient checker only needs the forward operator.
+  2. The user only needs to prepare the input data for forward Operator and not worry about the backward Operator.

 ## Mathematical Theory
-The following two document from Stanford has a detailed explanation of how to get numerical gradient and why it's useful.
+The following documents from Stanford have a detailed explanation of how to compute the numerical gradient and why it is useful.

 - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
 - [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)


-## Numeric Gradient Implementation
+## Numerical Gradient Implementation
 ### Python Interface
 ```python
 def get_numerical_gradient(op,
@ -27,73 +27,76 @@ def get_numerical_gradient(op,
                         delta=0.005,
                         local_scope=None):
    """
-    Get Numeric Gradient for an operator's input.
+    Get Numerical Gradient for the input of an operator.

-    :param op: C++ operator instance, could be an network
+    :param op: C++ operator instance, could be an network.
    :param input_values: The input variables. Should be an dictionary, whose key is
-    variable name, and value is numpy array.
+    variable name, and value is a numpy array.
    :param output_name: The final output variable name.
-    :param input_to_check: The input variable with respect to which to compute the gradient.
-    :param delta: The perturbation value for numeric gradient method. The
-    smaller delta is, the more accurate result will get. But if that delta is
-     too small, it will suffer from numerical stability problem.
+    :param input_to_check: The input variable with respect to which the gradient has to be computed.
+    :param delta: The perturbation value for numerical gradient method. The
+    smaller the delta, the more accurate the result. But if the delta is too
+    small, it will suffer from the numerical stability problem.
    :param local_scope: The local scope used for get_numeric_gradient.
    :return: The gradient array in numpy format.
    """
 ```

-### Explaination:
+### Explanation:

- Why need `output_name`
-  - An Operator may have multiple Output, one can get independent gradient from each Output. So caller should specify the name of the output variable.
+- Why do we need an `output_name`
+  - An Operator may have multiple Outputs, one can compute an independent gradient from each Output. So the caller should specify the name of the output variable.

- Why need `input_to_check`
-  - One operator may have multiple inputs. Gradient Op can calculate the gradient of these inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times.
+- Why do we need `input_to_check`
+  - One operator can have multiple inputs. Gradient Op can calculate the gradient of these inputs at the same time. But Numerical Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times each with a different input.


 ### Core Algorithm Implementation


 ```python
-    # we only compute gradient of one element a time.
+    # we only compute the gradient of one element a time.
    # we use a for loop to compute the gradient of each element.
    for i in xrange(tensor_size):
-        # get one input element by its index i.
-        origin = tensor_to_check.get_float_element(i)
+        # get one input element using the index i.
+        original = tensor_to_check.get_float_element(i)

-        # add delta to it, run op and then get the new value of the result tensor.
-        x_pos = origin + delta
+        # add delta to it, run the forward op and then
+        # get the new value of the result tensor.
+        x_pos = original + delta
        tensor_to_check.set_float_element(i, x_pos)
        y_pos = get_output()

-        # plus delta to this element, run op and get the new value of the result tensor.
-        x_neg = origin - delta
+        # Subtract delta from this element, run the op again
+        # and get the new value of the result tensor.
+        x_neg = original - delta
        tensor_to_check.set_float_element(i, x_neg)
        y_neg = get_output()

        # restore old value
-        tensor_to_check.set_float_element(i, origin)
+        tensor_to_check.set_float_element(i, original)

-        # compute the gradient of this element and store it into a numpy array.
+        # compute the gradient of this element and store
+        # it into a numpy array.
        gradient_flat[i] = (y_pos - y_neg) / delta / 2

    # reshape the gradient result to the shape of the source tensor.
    return gradient_flat.reshape(tensor_to_check.get_dims())
 ```

-## Auto Graident Checker Framework
+## Auto Gradient Check Framework

 Each Operator Kernel has three kinds of Gradient:

 1. Numerical gradient
 2. CPU kernel gradient
-3. GPU kernel gradient (if supported)
+3. GPU kernel gradient (if supported by the device)

-The numerical gradient only relies on forward Operator. So we use the numerical gradient as the reference value. And the gradient checking is performed in the following three steps:
+The numerical gradient only relies on the forward Operator, so we use the numerical gradient as the reference value. The gradient checking is performed in the following three steps:

-1. calculate the numerical gradient
-2. calculate CPU kernel gradient with the backward Operator and compare it with the numerical gradient
-3. calculate GPU kernel gradient with the backward Operator and compare it with the numeric gradient (if supported)
+1. Calculate the numerical gradient
+2. Calculate CPU kernel gradient with the backward Operator and compare it with the numerical gradient.
+3. Calculate GPU kernel gradient with the backward Operator and compare it with the numeric gradient. (if supported)

 #### Python Interface

@ -109,26 +112,27 @@ The numerical gradient only relies on forward Operator. So we use the numerical
        """
        :param forward_op: used to create backward_op
        :param input_vars: numpy value of input variable. The following
-            computation will use these variables.
-        :param inputs_to_check: the input variable with respect to which to compute the gradient.
+          computation will use these variables.
+        :param inputs_to_check: the input variable with respect to which the
+          gradient will be computed.
        :param output_name: The final output variable name.
        :param max_relative_error: The relative tolerance parameter.
-        :param no_grad_set: used when create backward ops
+        :param no_grad_set: used to create backward ops
        :param only_cpu: only compute and check gradient on cpu kernel.
        :return:
        """
 ```

-### How to check if two numpy array is close enough?
-if `abs_numerical_grad` is nearly zero, then use abs error for numerical_grad
+### How to check if two numpy arrays are close enough?
+if `abs_numerical_grad` is nearly zero, then use absolute error for numerical_grad.

 ```python
 numerical_grad = ...
 operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())

 abs_numerical_grad = numpy.abs(numerical_grad)
-# if abs_numerical_grad is nearly zero, then use abs error for numeric_grad, not relative
-# error.
+# if abs_numerical_grad is nearly zero, then use abs error for
+# numeric_grad, instead of relative error.
 abs_numerical_grad[abs_numerical_grad < 1e-3] = 1

 diff_mat = numpy.abs(abs_numerical_grad - operator_grad) / abs_numerical_grad
@ -137,10 +141,10 @@ max_diff = numpy.max(diff_mat)


 #### Notes：
-The Input data for auto gradient checker should be reasonable to avoid numerical  stability problem.
+The Input data for auto gradient checker should be reasonable to avoid numerical stability problem.


-#### Refs:
+#### References:

 - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
 - [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)
--- a/doc/design/cpp_data_feeding.md
+++ b/doc/design/cpp_data_feeding.md
@ -0,0 +1,79 @@
+# C++ Data Feeding
+
+In training with Paddle V2 API, data feeding wholly dependents on Python code. To get rid of the Python environment and achieve the goal of "wrapping the whole training by a while loop op" in Paddle Fluid, a C++ data feeding mechanism is required. 
+
+In this document we show the fundamental design of C++ data feeding process, which includes the data reading, shuffling and batching.
+
+## Reader
+
+A new concept named 'Reader' is introduced. `Reader` is a series of inherited classes which can be hold by our `Variable` and they are used to read or process file data.
+
+
+### `ReaderBase`
+
+`ReaderBase` is the abstract base class of all readers. It defines the all readers' interfaces.
+
+```cpp
+class ReaderBase {
+ public:
+  explicit ReaderBase(const std::vector<DDim>& shapes) : shapes_(shapes) {
+    PADDLE_ENFORCE(!shapes_.empty());
+  }
+  // Read the next batch of data. (A 'batch' can be only one instance)
+  virtual void ReadNext(std::vector<LoDTensor>* out) = 0;
+  // Show whether the next bacth exists.
+  virtual bool HasNext() const = 0;
+  
+  // Reinitialize the reader and read the file from the begin.
+  virtual void ReInit() = 0;
+  
+  // Get a certain read in data's shape.
+  DDim shape(size_t idx) const;
+  // Get shapes of all read in data.
+  std::vector<DDim> shapes() const { return shapes_; }
+  // Set shapes of read in data.
+  void set_shapes(const std::vector<DDim>& shapes) { shapes_ = shapes; }
+
+  virtual ~ReaderBase() {}
+
+ protected:
+  std::vector<DDim> shapes_;
+};
+```
+
+### `FileReader` and `DecoratedReader`
+
+These two classes are derived from the `ReaderBase` and will further be derived by respective specific readers. That is to say, in our design, there are two kinds of readers: file readers and decorated readers. A file reader reads from a file of some specific format, and yield only one instance of data at a time. e.g. RecordIO reader, jpg reader, .... A decorated reader takes another reader(both file reader and decorated reader are OK) as its 'underlying reader'. It gets data from its underlying reader, does some process on them(shuffling, or batching), then yields processed data. The output data of a decorated reader can be a single instance or a batch. `ShuffleReader` and `BatchReader` are both decorated readers.
+
+All the readers share exactly the same interfaces defined in `ReaderBase`. So they can be decorated for more than one time: We can **shuffle** a reader's outputs and then **batch** the shuffle outputs. The interface consistency also allows related ops use readers without knowing what they are exactly.
+
+
+### `ReaderHolder`
+
+Different readers belong to different class types. It leads to a problem: How can we drop them into `Variable`s and fetch them out by a unified method? For example, if a Variable holds a `BatchReader`, we can not get it by the following code:
+
+```cpp
+var->Get<ReaderBase>("batch_reader");
+```
+
+we have to write:
+
+```cpp
+var->Get<BatchReader>("batch_reader");
+```
+
+This requires each time getting a reader from a variable we must know the reader's type exactly. It is nearly impossible.
+
+To solve this problem, we introduce `ReaderHolder` as a wrapper. It acts as an empty decorator of `ReaderBase`, which erases reader's type. With `ReaderHolder` we are able to fetch all types of readers by `var->Get<ReaderHolder>("...")` and regard the obtained object as a reader.
+
+## Related Operators
+
+To create and invoke readers, some now ops are introduced:
+
+### `CreateReaderOp`
+
+Each reader has its creating op. File readers' creating ops have no input and yield the created file reader as its output. Decorated readers' creating ops take the underlying readers as inputs and then yield new decorated readers.
+
+### `ReadOp`
+
+A reader is only a Variable. It cannot trigger the reading process by itself. So we add the `ReadOp` to execute it. A `ReadOp` takes a reader Variable as its input. Each time it runs, it invokes the reader‘s `ReadNext()` function and gets a new batch of data(or only one instance of data, if we use file reader directly). The output data of a reader are in the form of `std::vector<LoDTenosr>`, so the `ReadOp` also needs to split the vector and move LoDTensors to their respective output Variables.
--- a/doc/design/csp.md
+++ b/doc/design/csp.md
@ -144,8 +144,9 @@ ch = fluid.make_channel(dtype=INT, buffer_size)
 # Now write three elements to the channel
 with fluid.while(steps=buffer_size):
  fluid.send(ch, step)
-  fluid.close_channel(ch)
-  
+
+fluid.close_channel(ch)
+
 with fluid.while(steps=buffer_size):
  fluid.print(fluid.recv(ch))
 ```
--- a/doc/design/switch.md
+++ b/doc/design/switch.md
@ -10,8 +10,7 @@ The following example shows the usage of `fluid.switch`.
 a = fluid.Var(10)
 b = fluid.Var(0)

-switch = fluid.switch()
-with switch.block():
+with switch() as switch:
    with switch.case(fluid.less_equal(a, 10)):
        fluid.print("Case 1")
    with switch.case(fluid.larger(a, 0)):
--- a/doc/howto/dev/FullyConnected.jpg
+++ b/doc/howto/dev/FullyConnected.jpg
--- a/doc/howto/dev/contribute_to_paddle_cn.md
+++ b/doc/howto/dev/contribute_to_paddle_cn.md
--- a/Show More
+++ b/Show More