deal with conflit

8 years ago · 0dc4b29879
parent 931a23f1d2 2711bd92e2
commit 0dc4b29879
717 changed files with 33151 additions and 8697 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -31,6 +31,3 @@
    -   id: go-fmt
        types:
        - go
-    -   id: gometalinter
-        types:
-        - go
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -86,6 +86,14 @@ if(ANDROID OR IOS)
        "Disable MKLDNN when cross-compiling for Android and iOS" FORCE)
    set(WITH_MKLML OFF CACHE STRING
        "Disable MKLML package when cross-compiling for Android and iOS" FORCE)
+
+    # Compile PaddlePaddle mobile inference library
+    if (NOT WITH_C_API)
+        set(WITH_C_API ON CACHE STRING
+            "Always compile the C_API when cross-compiling for Android and iOS" FORCE)
+    endif()
+    set(MOBILE_INFERENCE ON)
+    add_definitions(-DPADDLE_MOBILE_INFERENCE)
 endif()

 set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
@ -97,6 +105,12 @@ if (WITH_C_API AND WITH_PYTHON)
    "different Python interpreter from compiling.")
 endif()

+if(MOBILE_INFERENCE)
+    set(THIRD_PARTY_BUILD_TYPE MinSizeRel)
+else()
+    set(THIRD_PARTY_BUILD_TYPE Release)
+endif()
+
 ########################################################################################

 include(external/mklml)     # download mklml package
@ -113,6 +127,7 @@ include(external/warpctc)   # download, build, install warpctc
 include(external/any)       # download libn::any
 include(external/eigen)     # download eigen3
 include(external/pybind11)    # download pybind11
+include(external/nccl)

 include(cudnn)              # set cudnn libraries, must before configure
 include(configure)          # add paddle env configuration
@ -145,7 +160,7 @@ set(EXTERNAL_LIBS
 if(WITH_GPU)
    list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
    if(NOT WITH_DSO)
-        list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
+        list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
    endif(NOT WITH_DSO)
 endif(WITH_GPU)

@ -160,9 +175,11 @@ endif(USE_NNPACK)

 add_subdirectory(proto)

-# "add_subdirectory(go)" should be placed after the following loine,
-# because it depends on paddle/optimizer.
-add_subdirectory(paddle/optimizer)
+if(NOT MOBILE_INFERENCE)
+    # "add_subdirectory(go)" should be placed after the following loine,
+    # because it depends on paddle/optimizer.
+    add_subdirectory(paddle/optimizer)
+endif()

 # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be
 # placed after this block, because they depends on it.
--- a/2
+++ b/2
@ -22,7 +22,7 @@ COPY ./paddle/scripts/docker/root/ /root/

 RUN apt-get update && \
    apt-get install -y \
-    git python-pip python-dev openssh-server bison  \
+    git python-pip python-dev openssh-server bison libnccl-dev \
    wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
    curl sed grep graphviz libjpeg-dev zlib1g-dev  \
    python-matplotlib gcc-4.8 g++-4.8 \
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@ -24,6 +24,10 @@ if(WITH_DOUBLE)
    add_definitions(-DPADDLE_TYPE_DOUBLE)
 endif(WITH_DOUBLE)

+if(WITH_TESTING)
+    add_definitions(-DPADDLE_WITH_TESTING)
+endif(WITH_TESTING)
+
 if(NOT WITH_TIMER)
    add_definitions(-DPADDLE_DISABLE_TIMER)
 endif(NOT WITH_TIMER)
@ -49,19 +53,20 @@ if(NOT WITH_GOLANG)
 endif(NOT WITH_GOLANG)

 if(NOT WITH_GPU)
-    add_definitions(-DPADDLE_ONLY_CPU)
    add_definitions(-DHPPL_STUB_FUNC)

    list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
 else()
+    add_definitions(-DPADDLE_WITH_CUDA)
+
    FIND_PACKAGE(CUDA REQUIRED)

    if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
-        message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
+        message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
    endif()

    if(NOT CUDNN_FOUND)
-        message(FATAL_ERROR "Paddle need cudnn to compile")
+        message(FATAL_ERROR "Paddle needs cudnn to compile")
    endif()

    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
--- a/cmake/external/eigen.cmake
+++ b/cmake/external/eigen.cmake
@ -8,7 +8,7 @@ ExternalProject_Add(
    extern_eigen3
    ${EXTERNAL_PROJECT_LOG_ARGS}
    GIT_REPOSITORY  "https://github.com/RLovelett/eigen.git"
-    GIT_TAG         "master"
+    GIT_TAG         70661066beef694cadf6c304d0d07e0758825c10
    PREFIX          ${EIGEN_SOURCE_DIR}
    UPDATE_COMMAND  ""
    CONFIGURE_COMMAND ""
--- a/cmake/external/gflags.cmake
+++ b/cmake/external/gflags.cmake
@ -36,6 +36,7 @@ ExternalProject_Add(
    # change this back to the official Github repo once my PR is
    # merged.
    GIT_REPOSITORY  "https://github.com/wangkuiyi/gflags.git"
+    GIT_TAG         986964c07427ecb9cdb5bd73f73ebbd40e54dadb
    PREFIX          ${GFLAGS_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -45,11 +46,11 @@ ExternalProject_Add(
                    -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DBUILD_TESTING=OFF
-                    -DCMAKE_BUILD_TYPE=Release
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-                     -DCMAKE_BUILD_TYPE:STRING=Release
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
 )

 ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@ -31,6 +31,7 @@ ExternalProject_Add(
    ${EXTERNAL_PROJECT_LOG_ARGS}
    DEPENDS gflags
    GIT_REPOSITORY  "https://github.com/google/glog.git"
+    GIT_TAG         v0.3.5
    PREFIX          ${GLOG_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -43,12 +44,12 @@ ExternalProject_Add(
                    -DWITH_GFLAGS=ON
                    -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
                    -DBUILD_TESTING=OFF
-                    -DCMAKE_BUILD_TYPE=Release
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
                     -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-                     -DCMAKE_BUILD_TYPE:STRING=Release
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
 )

 ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
--- a/cmake/external/gtest.cmake
+++ b/cmake/external/gtest.cmake
@ -56,11 +56,11 @@ IF(WITH_TESTING)
                        -DBUILD_GMOCK=ON
                        -Dgtest_disable_pthreads=ON
                        -Dgtest_force_shared_crt=ON
-                        -DCMAKE_BUILD_TYPE=Release
+                        -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                        ${EXTERNAL_OPTIONAL_ARGS}
        CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GTEST_INSTALL_DIR}
                         -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-                         -DCMAKE_BUILD_TYPE:STRING=Release
+                         -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
    )

    ADD_LIBRARY(gtest STATIC IMPORTED GLOBAL)
--- a/cmake/external/nccl.cmake
+++ b/cmake/external/nccl.cmake
@ -0,0 +1,49 @@
+include(ExternalProject)
+
+set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
+
+include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
+
+if(WITH_DSO)
+  # If we use DSO, we do not build nccl, just download the dependencies
+  set(NCCL_BUILD_COMMAND "")
+  set(NCCL_INSTALL_COMMAND "")
+  set(NCCL_INSTALL_DIR "")
+else()
+  # otherwise, we build nccl and link it.
+  set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
+  # Note: cuda 8.0 is needed to make nccl
+  # When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
+  set(NCCL_BUILD_COMMAND "make -j 8")
+  set(NCCL_INSTALL_COMMAND  "make install PREFIX=${NCCL_INSTALL_DIR}")
+endif()
+
+ExternalProject_Add(
+    extern_nccl
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    GIT_REPOSITORY  "https://github.com/NVIDIA/nccl.git"
+    GIT_TAG         "v1.3.4-1"
+    PREFIX          "${NCCL_SOURCE_DIR}"
+    UPDATE_COMMAND  ""
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND     "${NCCL_BUILD_COMMAND}"
+    INSTALL_COMMAND   "${NCCL_INSTALL_COMMAND}"
+    INSTALL_DIR       "${NCCL_INSTALL_DIR}"
+    TEST_COMMAND      ""
+)
+
+if(WITH_DSO)
+  if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
+    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
+    file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
+    add_library(nccl STATIC ${dummyfile})
+  else()
+    add_library(nccl INTERFACE)
+  endif()
+else()
+  add_library(nccl STATIC IMPORTED GLOBAL)
+  set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
+               ${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
+endif()
+
+add_dependencies(nccl extern_nccl)
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -191,12 +191,12 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
            ${OPTIONAL_ARGS}
            -Dprotobuf_BUILD_TESTS=OFF
            -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-            -DCMAKE_BUILD_TYPE=Release
+            -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
            -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
            -DCMAKE_INSTALL_LIBDIR=lib
        CMAKE_CACHE_ARGS
            -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-            -DCMAKE_BUILD_TYPE:STRING=Release
+            -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
            -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
            -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
            ${OPTIONAL_CACHE_ARGS}
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@ -35,6 +35,7 @@ ExternalProject_Add(
    extern_warpctc
    ${EXTERNAL_PROJECT_LOG_ARGS}
    GIT_REPOSITORY  "https://github.com/gangliao/warp-ctc.git"
+    GIT_TAG         b63a0644654a3e0ed624c85a1767bc8193aead09
    PREFIX          ${WARPCTC_SOURCES_DIR}
    UPDATE_COMMAND  ""
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
@ -48,9 +49,9 @@ ExternalProject_Add(
                    -DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON
                    -DBUILD_SHARED=ON
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-                    -DCMAKE_BUILD_TYPE=Release
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
-    CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=Release
+    CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
                     -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR}
 )
--- a/cmake/external/zlib.cmake
+++ b/cmake/external/zlib.cmake
@ -42,11 +42,11 @@ ExternalProject_Add(
                    -DBUILD_SHARED_LIBS=OFF
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DCMAKE_MACOSX_RPATH=ON
-                    -DCMAKE_BUILD_TYPE=Release
+                    -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ZLIB_INSTALL_DIR}
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-                     -DCMAKE_BUILD_TYPE:STRING=Release
+                     -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
 )

 LIST(APPEND external_project_dependencies zlib)
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -389,13 +389,60 @@ function(go_test TARGET_NAME)
    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
 endfunction(go_test)

+# Modification of standard 'protobuf_generate_cpp()' with protobuf-lite support
+# Usage:
+#   paddle_protobuf_generate_cpp(<proto_srcs> <proto_hdrs> <proto_files>)
+
+function(paddle_protobuf_generate_cpp SRCS HDRS)
+  if(NOT ARGN)
+    message(SEND_ERROR "Error: paddle_protobuf_generate_cpp() called without any proto files")
+    return()
+  endif()
+
+  set(${SRCS})
+  set(${HDRS})
+
+  if (MOBILE_INFERENCE)
+      set(EXTRA_FLAG "lite:")  
+  else()
+      set(EXTRA_FLAG "") 
+  endif()
+
+  foreach(FIL ${ARGN})
+    get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+    get_filename_component(FIL_WE ${FIL} NAME_WE)
+    
+    set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
+    set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
+    list(APPEND ${SRCS} "${_protobuf_protoc_src}")
+    list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
+    
+    add_custom_command(
+      OUTPUT "${_protobuf_protoc_src}"
+             "${_protobuf_protoc_hdr}"
+
+      COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
+      COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} 
+      -I${CMAKE_CURRENT_SOURCE_DIR}
+      --cpp_out "${EXTRA_FLAG}${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL}
+      DEPENDS ${ABS_FIL} protoc
+      COMMENT "Running C++ protocol buffer compiler on ${FIL}"
+      VERBATIM )
+  endforeach()
+
+  set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+  set(${SRCS} ${${SRCS}} PARENT_SCOPE)
+  set(${HDRS} ${${HDRS}} PARENT_SCOPE)
+endfunction()
+
+
 function(proto_library TARGET_NAME)
  set(oneValueArgs "")
  set(multiValueArgs SRCS DEPS)
  cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
  set(proto_srcs)
  set(proto_hdrs)
-  protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
+  paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
  cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf)
 endfunction()

--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@ -73,25 +73,43 @@ function(link_paddle_exe TARGET_NAME)
        generate_rdma_links()
    endif()

-    target_circle_link_libraries(${TARGET_NAME}
-        ARCHIVE_START
-        paddle_gserver
-        paddle_function
-        ARCHIVE_END
-        paddle_pserver
-        paddle_trainer_lib
-        paddle_network
-        paddle_math
-        paddle_utils
-        paddle_parameter
-        paddle_proto
-        paddle_cuda
-        paddle_optimizer
-        ${EXTERNAL_LIBS}
-        ${CMAKE_THREAD_LIBS_INIT}
-        ${CMAKE_DL_LIBS}
-        ${RDMA_LD_FLAGS}
-        ${RDMA_LIBS})
+    if(MOBILE_INFERENCE)
+        target_circle_link_libraries(${TARGET_NAME}
+            ARCHIVE_START
+            paddle_gserver
+            paddle_function
+            ARCHIVE_END
+            paddle_math
+            paddle_utils
+            paddle_parameter
+            paddle_proto
+            paddle_cuda
+            ${EXTERNAL_LIBS}
+            ${CMAKE_THREAD_LIBS_INIT}
+            ${CMAKE_DL_LIBS}
+            ${RDMA_LD_FLAGS}
+            ${RDMA_LIBS})
+    else()
+        target_circle_link_libraries(${TARGET_NAME}
+            ARCHIVE_START
+            paddle_gserver
+            paddle_function
+            ARCHIVE_END
+            paddle_pserver
+            paddle_trainer_lib
+            paddle_network
+            paddle_math
+            paddle_utils
+            paddle_parameter
+            paddle_proto
+            paddle_cuda
+            paddle_optimizer
+            ${EXTERNAL_LIBS}
+            ${CMAKE_THREAD_LIBS_INIT}
+            ${CMAKE_DL_LIBS}
+            ${RDMA_LD_FLAGS}
+            ${RDMA_LIBS})
+    endif()

    if(ANDROID)
        target_link_libraries(${TARGET_NAME} log)
--- a/doc/api/v1/index_cn.rst
+++ b/doc/api/v1/index_cn.rst
@ -21,7 +21,7 @@ Model Config API
    trainer_config_helpers/optimizers.rst
    trainer_config_helpers/data_sources.rst
    trainer_config_helpers/layers.rst
-    trainer_config_helpers/activations.rst 
+    trainer_config_helpers/activations.rst
    trainer_config_helpers/poolings.rst
    trainer_config_helpers/networks.rst
    trainer_config_helpers/evaluators.rst
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@ -345,6 +345,11 @@ clip
 ..  autoclass:: paddle.v2.layer.clip
    :noindex:

+resize
+------
+..  autoclass:: paddle.v2.layer.resize
+    :noindex:
+
 slope_intercept
 ---------------
 ..  autoclass:: paddle.v2.layer.slope_intercept
--- a/doc/api/v2/config/networks.rst
+++ b/doc/api/v2/config/networks.rst
@ -125,3 +125,8 @@ simple_attention
    :members: simple_attention
    :noindex:

+dot_product_attention
+---------------------
+..  automodule:: paddle.v2.networks
+    :members: dot_product_attention
+    :noindex:
--- a/doc/design/block.md
+++ b/doc/design/block.md
--- a/doc/design/cluster_train/src/trainer.graffle
+++ b/doc/design/cluster_train/src/trainer.graffle
--- a/doc/design/dcgan.png
+++ b/doc/design/dcgan.png
--- a/doc/design/executor.md
+++ b/doc/design/executor.md
@ -0,0 +1,23 @@
+# Executor Design Doc
+
+## Motivation
+
+We use executor to do the runtime evaluation of a `ProgramDesc`.
+
+## Overview
+
+An executor takes a `ProgramDesc`, a `block_id` and a `Scope`.  The `ProgramDesc` is a list of blocks and each block contains the protobuf definition of all the parameters and operators. The `block_id` specifies the entrance block. And the `Scope` is the container of all the variable instance, which is persistent throughout different runs.
+
+### What does executor do?
+
+It evaluates all the operators in the `block_id`th block of a `ProgramDesc`.
+
+### What does executor NOT do?
+
+It does not do runtime optimization, meaning intelligently parse the dependency of each op a choose which one to be run and in which order they should be run.
+
+It does not do graph partitioning, meaning dividing the `ProgramDesc` into several small pieces and executing them on different devices.
+
+## Implementation
+
+`Executor` evaluates a `ProgramDesc`. Essentially, it instantiates Variables and Operators, then run all the operators in sequence. [[code]](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/executor.cc)
--- a/doc/design/gan_api.md
+++ b/doc/design/gan_api.md
--- a/doc/design/graph_survey.md
+++ b/doc/design/graph_survey.md
@ -0,0 +1,232 @@
+## Survey on Graph
+
+Neural network framework often provides symbolic API for users to write network topology conveniently. This doc manily focus on symbolic API in most popular neural network frameworks, and try to find out how to parse symbolic configuration to a portable file, such as protobuf or json.
+
+### Mxnet
+
+The core concept of symbolic API is `Symbol`. Mxnet implements `Symbol` class in C++, and export to Python using C-API. Please refer to the comments in Mxnet:
+
+
+`Symbol` is help class used to represent the operator node in Graph.
+`Symbol` acts as an interface for building graphs from different components like Variable, Functor and Group. `Symbol` is also exported to python front-end (while Graph is not) to enable quick test and deployment. Conceptually, symbol is the final operation of a graph and thus including all the information required (the graph) to evaluate its output value.
+
+
+A simple network topology wrote by Symbol is as follows:
+
+```python
+def get_symbol(num_classes=10, **kwargs):
+    data = mx.symbol.Variable('data')
+    data = mx.symbol.Flatten(data=data)
+    fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
+    act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
+    fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
+    act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
+    fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
+    mlp  = mx.symbol.SoftmaxOutput(data = fc3, name = 'softmax')
+    return mlp
+```
+
+
+
+Varible here is actually a Symbol. Every basic Symbol will correspond to one Node, and every Node has its own NodeAttr. There is a op field in NodeAttr class, when a Symbol represents Variable(often input data), the op field is null.
+
+Symbol contains a data member, std::vector<NodeEntry> outputs, and NodeEntry cantains a poniter to Node. We can follow the Node pointer to get all the Graph.
+
+And Symbol can be saved to a Json file.
+
+Here is a detailed example:
+
+```
+>>> import mxnet as mx
+>>> data = mx.symbol.Variable('data')
+>>> print data.debug_str()
+Variable:data
+
+>>> data = mx.symbol.Flatten(data=data)
+>>> print data.debug_str()
+Symbol Outputs:
+	output[0]=flatten0(0)
+Variable:data
+--------------------
+Op:Flatten, Name=flatten0
+Inputs:
+	arg[0]=data(0) version=0
+
+>>> fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
+>>> print fc1.debug_str()
+Symbol Outputs:
+	output[0]=fc1(0)
+Variable:data
+--------------------
+Op:Flatten, Name=flatten0
+Inputs:
+	arg[0]=data(0) version=0
+Variable:fc1_weight
+Variable:fc1_bias
+--------------------
+Op:FullyConnected, Name=fc1
+Inputs:
+	arg[0]=flatten0(0)
+	arg[1]=fc1_weight(0) version=0
+	arg[2]=fc1_bias(0) version=0
+Attrs:
+	num_hidden=128
+
+```
+
+
+### TensorFlow
+
+
+The core concept of symbolic API is `Tensor`. Tensorflow defines `Tensor` in Python. Please refer to the comments in TensorFlow:
+
+A `Tensor` is a symbolic handle to one of the outputs of an `Operation`. It does not hold the values of that operation's output, but instead provides a means of computing those values in a TensorFlow [Session](https://www.tensorflow.org/api_docs/python/tf/Session).
+
+A simple example is as follows:
+
+```python
+  # Build a dataflow graph.
+  c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
+  d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
+  e = tf.matmul(c, d)
+
+  # Construct a `Session` to execute the graph.
+  sess = tf.Session()
+
+  # Execute the graph and store the value that `e` represents in `result`.
+  result = sess.run(e)
+```
+
+  
+The main method of `Tensor` is as follows: 
+ 
+ 
+```python
+@property
+def op(self):
+  """The `Operation` that produces this tensor as an output."""
+  return self._op
+
+@property
+def dtype(self):
+   """The `DType` of elements in this tensor."""
+  return self._dtype
+
+@property
+def graph(self):
+  """The `Graph` that contains this tensor."""
+  return self._op.graph
+
+@property
+def name(self):
+  """The string name of this tensor."""
+  if not self._op.name:
+    raise ValueError("Operation was not named: %s" % self._op)
+  return "%s:%d" % (self._op.name, self._value_index)
+
+@property
+def device(self):
+  """The name of the device on which this tensor will be produced, or None."""
+  return self._op.device
+```
+
+
+Tensor can be taken as target to run by session. Tensor contains all the information of Graph, and tracks data dependency.
+
+
+Here is a detailed example:
+
+
+```
+>>> import tensorflow as tf
+>>> c = tf.constant([[1.0, 2.0], [3.0, 4.0]])
+>>> print c.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+>>> d = tf.constant([[1.0, 1.0], [0.0, 1.0]])
+>>> print d.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+>>> e = tf.matmul(c, d)
+>>> print e.graph
+<tensorflow.python.framework.ops.Graph object at 0x10f256d50>
+```
+
+### Dynet
+
+
+The core concept of symbolic API is `Expression`, and Dynet defines `Expression` class in C++.
+
+
+A simple example is as follows:
+
+```cpp
+ComputationGraph cg;
+Expression W = parameter(cg, pW);
+
+Expression in = input(cg, xs[i]);
+Expression label = input(cg, ys[i]);
+Expression pred = W * in;
+Expression loss = square(pred - label);
+```
+
+The input data and parameter are also represented by Expression. Every basci Expression corresponds to a Node. And input data is also a Node. 
+
+Expression has a data member ComputationGraph, and ComputationGraph will be modified in users' configuring process. Expression can be a running target, beacuse Expression contains all dependency.
+
+
+Here is a detailed example:
+
+write topology in C++
+
+```
+ComputationGraph cg;
+Expression W = parameter(cg, pW);
+cg.print_graphviz();
+
+Expression pred = W * xs[i];
+cg.print_graphviz();
+
+Expression loss = square(pred - ys[i]);
+cg.print_graphviz();
+```
+
+compile and print
+
+```
+# first print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+}
+# second print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+  N1 [label="v1 = v0 * -0.98"];
+  N0 -> N1;
+}
+# third print
+digraph G {
+  rankdir=LR;
+  nodesep=.05;
+  N0 [label="v0 = parameters({1}) @ 0x7ffe4de00110"];
+  N1 [label="v1 = v0 * -0.98"];
+  N0 -> N1;
+  N2 [label="v2 = -1.88387 - v1"];
+  N1 -> N2;
+  N3 [label="v3 = -v2"];
+  N2 -> N3;
+  N4 [label="v4 = square(v3)"];
+  N3 -> N4;
+}
+```
+
+### Conclusion
+
+
+Actually, Symbol/Tensor/Expression in Mxnet/TensorFlow/Dynet are the same level concepts. We use a unified name Expression here, this level concept has following features:
+
+- Users wirte topoloy with symbolic API, and all return value is Expression, including input data and parameter.
+- Expression corresponds with a global Graph, and Expression can also be composed.
+- Expression tracks all dependency and can be taken as a run target
--- a/doc/design/if_else_op.md
+++ b/doc/design/if_else_op.md
@ -1,41 +1,51 @@
-IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has N instances. If cond[i] == True, input instance input[i] will go through true_block() and generate output[i]; otherwise it will produce output from false_bloack().
+# The `IfElse` Operator

-```python
-import paddle as pd
+PaddlePaddle's `IfElse` operator differs from TensorFlow's:

-x = var()
-y = var()
-cond = var()
-default_value = var()
-b = pd.create_ifelseop(inputs=[x], output_num=1)
-with b.true_block():
-    x = b.inputs(0)
-    z = operator.add(x, y)
-    b.set_output(0, operator.softmax(z))
-
-with b.false_block():
-    x = b.inputs(0)
-    z = layer.fc(x)
-    b.set_output(0, operator.softmax(z))
-
-out = b(cond)
-```
+- the TensorFlow version takes a scalar boolean value as the condition so that the whole mini-batch goes to either the true or the false branch, whereas
+- the PaddlePaddle version takes a vector of boolean value as the condition, and instances corresponding to true values go to the true branch, those corresponding to false values go to the false branch.
+
+## Example
+
+The following PaddlePaddle program shows the usage of the IfElse operator:

-If only true_block is set in an IfElseOp, a special case is that we can have a default value for false as:
 ```python
 import paddle as pd

-x = var()
-y = var()
-cond = var()
-default_value = var()
-b = pd.create_ifelseop(inputs=[x], output_num=1, default_value)
-
-with b.true_block():
-    x = b.inputs(0)
-    z = operator.add(x, y)
-    b.set_output(0, operator.softmax(z))
+x = minibatch([10, 20, 30]) # shape=[None, 1]
+y = var(1) # shape=[1], value=1
+z = minibatch([10, 20, 30]) # shape=[None, 1]
+cond = larger_than(x, 15) # [false, true, true]
+
+ie = pd.ifelse()
+with ie.true_block():
+    d = pd.layer.add(x, y)
+    ie.output(d, pd.layer.softmax(d))
+with ie.false_block():
+    d = pd.layer.fc(z)
+    ie.output(d, d+1)
+o1, o2 = ie(cond)
+```

-out = b(cond)
+A challenge to implement the `IfElse` operator is to infer those variables to be split, or, say, to identify the variable of the mini-batch or those derived from the mini-batch.
+
+An equivalent C++ program is as follows:
+
+```c++
+namespace pd = paddle;
+
+int x = 10;
+int y = 1;
+int z = 10;
+bool cond = false;
+int o1, o2;
+if (cond) {
+  int d = x + y;
+  o1 = z;
+  o2 = pd::layer::softmax(z);
+} else {
+  int d = pd::layer::fc(z);
+  o1 = d;
+  o2 = d+1;
+}
 ```
-where default_value is a list of vars for `cond` == False.
--- a/doc/design/images/feed_forward.png
+++ b/doc/design/images/feed_forward.png
--- a/Show More
+++ b/Show More