Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/shuffle_reader

7 years ago · a8c076e577
parent 225efa671f 1f757f5f70
commit a8c076e577
70 changed files with 753 additions and 498 deletions
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -244,11 +244,11 @@ function(cc_test TARGET_NAME)
    cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
    add_executable(${TARGET_NAME} ${cc_test_SRCS})
    # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
-    target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
    if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
      list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
    endif()
-    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
    add_test(NAME ${TARGET_NAME}
             COMMAND ${TARGET_NAME} ${cc_test_ARGS}
             WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
@ -311,8 +311,8 @@ function(nv_test TARGET_NAME)
    set(multiValueArgs SRCS DEPS)
    cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
    cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
-    target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
-    add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
+    target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
+    add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main paddle_memory gtest gflags glog)
    add_test(${TARGET_NAME} ${TARGET_NAME})
  endif()
 endfunction(nv_test)
--- a/doc/design/dist_refactor/distributed_architecture.md
+++ b/doc/design/dist_refactor/distributed_architecture.md
--- a/doc/design/dist_refactor/multi_cpu.md
+++ b/doc/design/dist_refactor/multi_cpu.md
--- a/doc/design/dist_refactor/parameter_server.md
+++ b/doc/design/dist_refactor/parameter_server.md
@ -59,6 +59,17 @@ After converting:
     queue. It will block until the queue has the required number of
     tensors.

+### Sparse Update
+
+For embedding layers, the gradient may have many rows containing only 0 when training,
+if the gradient uses a dense tensor to do parameter optimization,
+it could spend unnecessary memory, slow down the calculations and waste
+the bandwidth while doing distributed training.
+In Fluid, we introduce [SelectedRows](../selected_rows.md) to represent a list of rows containing
+non-zero gradient data. So when we do parameter optimization both locally and remotely,
+we only need to send those non-zero rows to the optimizer operators:
+
+<img src="src/sparse_update.png" width="700" />

 ### Benefits

@ -91,6 +102,6 @@ After converting:
  `min_count` attribute), does our current design support it? (similar
  question for the *Add* OP)

+### References

-### References:
 [1] [TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf)
--- a/doc/design/dist_refactor/src/compiler.graffle
+++ b/doc/design/dist_refactor/src/compiler.graffle
--- a/doc/design/dist_refactor/src/compiler.png
+++ b/doc/design/dist_refactor/src/compiler.png
--- a/doc/design/dist_refactor/src/dist-graph.graffle
+++ b/doc/design/dist_refactor/src/dist-graph.graffle
--- a/doc/design/dist_refactor/src/dist-graph.png
+++ b/doc/design/dist_refactor/src/dist-graph.png
--- a/doc/design/dist_refactor/src/distributed_architecture.graffle
+++ b/doc/design/dist_refactor/src/distributed_architecture.graffle
--- a/doc/design/dist_refactor/src/distributed_architecture.png
+++ b/doc/design/dist_refactor/src/distributed_architecture.png
--- a/doc/design/dist_refactor/src/local-graph.graffle
+++ b/doc/design/dist_refactor/src/local-graph.graffle
--- a/doc/design/dist_refactor/src/local-graph.png
+++ b/doc/design/dist_refactor/src/local-graph.png
--- a/doc/design/dist_refactor/src/local_architecture.graffle
+++ b/doc/design/dist_refactor/src/local_architecture.graffle
--- a/doc/design/dist_refactor/src/local_architecture.png
+++ b/doc/design/dist_refactor/src/local_architecture.png
--- a/doc/design/dist_refactor/src/multi-threads.graffle
+++ b/doc/design/dist_refactor/src/multi-threads.graffle
--- a/doc/design/dist_refactor/src/multi-threads/multi-threads@3x.png
+++ b/doc/design/dist_refactor/src/multi-threads/multi-threads@3x.png
--- a/doc/design/dist_refactor/src/multi-threads/single-thread@3x.png
+++ b/doc/design/dist_refactor/src/multi-threads/single-thread@3x.png
--- a/doc/design/dist_refactor/src/paddle-compile.graffle
+++ b/doc/design/dist_refactor/src/paddle-compile.graffle
--- a/doc/design/dist_refactor/src/paddle-compile.png
+++ b/doc/design/dist_refactor/src/paddle-compile.png
--- a/doc/design/dist_refactor/src/remote_executor.graffle
+++ b/doc/design/dist_refactor/src/remote_executor.graffle
--- a/doc/design/dist_refactor/src/remote_executor.png
+++ b/doc/design/dist_refactor/src/remote_executor.png
--- a/doc/design/fluid_dist/src/sparse_update.graffle
+++ b/doc/design/fluid_dist/src/sparse_update.graffle
--- a/doc/design/fluid_dist/src/sparse_update.png
+++ b/doc/design/fluid_dist/src/sparse_update.png
--- a/doc/v2/dev/index_cn.rst
+++ b/doc/v2/dev/index_cn.rst
@ -1,9 +1,24 @@
 开发标准
 ========
+PaddlePaddle遵守如下三个部分的代码和文档规范。
+
+PaddlePaddle使用git做版本管理，docker作为构建和测试环境。代码中包含了Cuda, C++, Python, Shell等多种编程语言。语言规范遵守Google C++ Style, Pep-8, 代码库中包含自动化检查工具做风格检查。代码注释需要遵守Doxygen规范，不满足风格要求的代码会编译失败。关于如何使用git, 构建测试及代码开发, 我们提供了如下指南。

 ..  toctree::
  :maxdepth: 1

  contribute_to_paddle_cn.md
+
+PaddlePaddle面向国内外用户，包含了中文和英文两部分的文档。设计文档和issue问题描述都推荐使用英文。对于设计文档，重在问题描述，背景阐述，然后才是解决方案。文档由Sphinx生成，因此代码注释也需要符合Sphinx文档标准。推荐本地使用paddlepaddle.org工具编译生成和预览文档，请参阅如下文档。
+
+..  toctree::
+  :maxdepth: 1
+
  write_docs_cn.rst
+
+PaddlePaddle V2 使用新增Layer方式定义新的操作。组合基础API可以实现多种复杂Layer, 满足绝大多数应用。如需要定制Layer，请参阅如下文档，欢迎提交patch。
+
+..  toctree::
+  :maxdepth: 1
+
  new_layer_cn.rst
--- a/doc/v2/howto/cluster/cmd_argument_cn.md
+++ b/doc/v2/howto/cluster/cmd_argument_cn.md
@ -71,6 +71,13 @@ paddle.init(
 - trainer_id：**必选，默认0**，每个trainer的唯一ID，从0开始的整数
 - pservers：**必选，默认127.0.0.1**，当前训练任务启动的pserver的IP列表，多个IP使用“,”隔开

+```python
+trainer = paddle.trainer.SGD(..., is_local=False)
+```
+
+参数说明
+
+- is_local: **必选, 默认True**, 是否使用PServer更新参数

 ## 准备数据集

--- a/Show More
+++ b/Show More