Merge branch 'develop' of github.com:baidu/Paddle into feature/upgrade_to_proto3

9 years ago · 79611a2794
parent 0c65442c5b 438a4ec6d6
commit 79611a2794
149 changed files with 1239 additions and 1643 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -25,8 +25,8 @@ find_package(ZLIB REQUIRED)
 find_package(NumPy REQUIRED)
 find_package(Threads REQUIRED)
 find_package(AVX QUIET)
-find_package(Glog)
-find_package(Gflags QUIET)
+find_package(Glog REQUIRED)
+find_package(Gflags REQUIRED)
 find_package(GTest)
 find_package(Sphinx)
 find_package(Doxygen)
@ -40,8 +40,6 @@ option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
 option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
 option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
 option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
-option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
-option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
 option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
 option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
@ -136,16 +134,12 @@ else(WITH_RDMA)
  add_definitions(-DPADDLE_DISABLE_RDMA)
 endif(WITH_RDMA)

-if(WITH_GLOG)
-    add_definitions(-DPADDLE_USE_GLOG)
-    include_directories(${LIBGLOG_INCLUDE_DIR})
-endif()
+# glog
+include_directories(${LIBGLOG_INCLUDE_DIR})

-if(WITH_GFLAGS)
-    add_definitions(-DPADDLE_USE_GFLAGS)
-    add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
-    include_directories(${GFLAGS_INCLUDE_DIRS})
-endif()
+#gflags
+add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
+include_directories(${GFLAGS_INCLUDE_DIRS})

 if(WITH_TESTING)
    enable_testing()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -0,0 +1 @@
+./doc/howto/dev/contribute_to_paddle_en.md
--- a/20
+++ b/20
@ -3,7 +3,7 @@ http_archive(
    name="protobuf",
    url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
    sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
-    strip_prefix="protobuf-3.1.0", )
+    strip_prefix="protobuf-3.1.0")

 # External dependency to gtest 1.7.0.  This method comes from
 # https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
@ -12,4 +12,20 @@ new_http_archive(
    url="https://github.com/google/googletest/archive/release-1.7.0.zip",
    sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
    build_file="third_party/gtest.BUILD",
-    strip_prefix="googletest-release-1.7.0", )
+    strip_prefix="googletest-release-1.7.0")
+
+# External dependency to gflags.  This method comes from
+# https://github.com/gflags/example/blob/master/WORKSPACE.
+new_git_repository(
+    name="gflags",
+    tag="v2.2.0",
+    remote="https://github.com/gflags/gflags.git",
+    build_file="third_party/gflags.BUILD")
+
+# External dependency to glog.  This method comes from
+# https://github.com/reyoung/bazel_playground/blob/master/WORKSPACE
+new_git_repository(
+    name="glog",
+    remote="https://github.com/google/glog.git",
+    commit="b6a5e0524c28178985f0d228e9eaa43808dbec3c",
+    build_file="third_party/glog.BUILD")
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@ -72,7 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
    ${source}
    ${destination}
    COMMENT "Generating sphinx documentation: ${builder}"
-    COMMAND ln -s ${destination}/index_*.html ${destination}/index.html
+    COMMAND ln -sf ${destination}/index_*.html ${destination}/index.html
    )

  set_property(
--- a/cmake/check_packages.cmake
+++ b/cmake/check_packages.cmake
@ -14,13 +14,9 @@ if(WITH_STYLE_CHECK)
  find_package(PythonInterp REQUIRED)
 endif()

-if(WITH_GLOG)
-  find_package(Glog REQUIRED)
-endif()
+find_package(Glog REQUIRED)

-if(WITH_GFLAGS)
-  find_package(Gflags REQUIRED)
-endif()
+find_package(Gflags REQUIRED)

 if(WITH_TESTING)
  find_package(GTest REQUIRED)
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@ -65,7 +65,7 @@ endmacro()
 # link_paddle_exe
 # add paddle library for a paddle executable, such as trainer, pserver.
 #
-# It will handle WITH_PYTHON/WITH_GLOG etc.
+# It will handle WITH_PYTHON etc.
 function(link_paddle_exe TARGET_NAME)
    if(WITH_RDMA)
        generate_rdma_links()
@ -108,6 +108,8 @@ function(link_paddle_exe TARGET_NAME)
        paddle_cuda
        ${METRIC_LIBS}
        ${PROTOBUF_LIBRARY}
+        ${LIBGLOG_LIBRARY}
+        ${GFLAGS_LIBRARIES}
        ${CMAKE_THREAD_LIBS_INIT}
        ${CBLAS_LIBS}
        ${ZLIB_LIBRARIES}
@ -125,16 +127,6 @@ function(link_paddle_exe TARGET_NAME)
            ${PYTHON_LIBRARIES})
    endif()

-    if(WITH_GLOG)
-        target_link_libraries(${TARGET_NAME}
-            ${LIBGLOG_LIBRARY})
-    endif()
-
-    if(WITH_GFLAGS)
-        target_link_libraries(${TARGET_NAME}
-            ${GFLAGS_LIBRARIES})
-    endif()
-
    if(WITH_GPU)
        if(NOT WITH_DSO OR WITH_METRIC)
            target_link_libraries(${TARGET_NAME}
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@ -43,13 +43,13 @@ def extract_dict_features(pair_file, feature_file):
            mark[verb_index] = 1
            ctx_0 = sentence_list[verb_index]

-            if verb_index < len(labels_list) - 2:
+            if verb_index < len(labels_list) - 1:
                mark[verb_index + 1] = 1
                ctx_p1 = sentence_list[verb_index + 1]
            else:
                ctx_p1 = 'eos'

-            if verb_index < len(labels_list) - 3:
+            if verb_index < len(labels_list) - 2:
                mark[verb_index + 2] = 1
                ctx_p2 = sentence_list[verb_index + 2]
            else:
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@ -16,7 +16,7 @@ set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
 set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")

 configure_file(
-    "${CMAKE_CURRENT_SOURCE_DIR}/conf.py.en.in"
+    "${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.en.in"
    "${BINARY_BUILD_DIR_EN}/conf.py"
    @ONLY)

@ -41,7 +41,7 @@ set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
 set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")

 configure_file(
-    "${CMAKE_CURRENT_SOURCE_DIR}/conf.py.cn.in"
+    "${CMAKE_CURRENT_SOURCE_DIR}/templates/conf.py.cn.in"
    "${BINARY_BUILD_DIR_CN}/conf.py"
    @ONLY)

--- a/doc/about/index_cn.md
+++ b/doc/about/index_cn.md
@ -0,0 +1,11 @@
+关于PaddlePaddle
+================
+
+PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台，兼备易用性、高效性、灵活性和可扩展性，目前已被百度内部多个产品线广泛使用。
+PaddlePaddle目前已经开放源码, 但是远未完善，我们希望能在这个基础上不断的改进、扩展和延伸。
+同时我们希望广大开发者积极提供反馈和贡献源代码，建立一个活跃的开源社区。
+
+致谢
+--------
+
+在此，特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。
--- a/doc/about/index_en.rst
+++ b/doc/about/index_en.rst
@ -11,4 +11,4 @@ We hope to build an active open source community both by providing feedback and
 Credits
 --------

-We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/blob/develop/authors>`_ of PaddlePaddle!
+We owe many thanks to `all contributors and developers <https://github.com/PaddlePaddle/Paddle/graphs/contributors>`_ of PaddlePaddle!
--- a/doc/api/index_cn.rst
+++ b/doc/api/index_cn.rst
@ -1,5 +1,5 @@
-API
-===
+API中文手册
+============

 DataProvider API
 ----------------
--- a/doc/getstarted/basic_usage/index_cn.rst
+++ b/doc/getstarted/basic_usage/index_cn.rst
@ -1,16 +1,16 @@
-简介
-====
+经典的线性回归任务
+==================

 PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍将向你展示如何利用PaddlePaddle来解决一个经典的线性回归问题。

-1. 一个经典的任务
-----------------
+任务简介
+--------

 我们展示如何用PaddlePaddle解决 `单变量的线性回归 <https://www.baidu.com/s?wd=单变量线性回归>`_ 问题。线性回归的输入是一批点 `(x, y)` ，其中 `y = wx + b + ε`， 而 ε 是一个符合高斯分布的随机变量。线性回归的输出是从这批点估计出来的参数 `w` 和 `b` 。

 一个例子是房产估值。我们假设房产的价格（y）是其大小（x）的一个线性函数，那么我们可以通过收集市场上房子的大小和价格，用来估计线性函数的参数w 和 b。

-2. 准备数据
+准备数据
 -----------

 假设变量 `x` 和 `y` 的真实关系为： `y = 2x + 0.3 + ε`，这里展示如何使用观测数据来拟合这一线性关系。首先，Python代码将随机产生2000个观测点，作为线性回归的输入。下面脚本符合PaddlePaddle期待的读取数据的Python程序的模式。
@ -28,7 +28,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
            x = random.random()
            yield [x], [2*x+0.3]

-3. 训练模型
+训练模型
 -----------

 为了还原 `y = 2x + 0.3`，我们先从一条随机的直线 `y' = wx + b` 开始，然后利用观测数据调整 `w` 和 `b` 使得 `y'` 和 `y` 的差距不断减小，最终趋于接近。这个过程就是模型的训练过程，而 `w` 和 `b` 就是模型的参数，即我们的训练目标。
@ -79,7 +79,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍

 PaddlePaddle将在观测数据集上迭代训练30轮，并将每轮的模型结果存放在 `./output` 路径下。从输出日志可以看到，随着轮数增加误差代价函数的输出在不断的减小，这意味着模型在训练数据上不断的改进，直到逼近真实解：` y = 2x + 0.3 `

-4. 模型检验
+模型检验
 -----------

 训练完成后，我们希望能够检验模型的好坏。一种常用的做法是用学习的模型对另外一组测试数据进行预测，评价预测的效果。在这个例子中，由于已经知道了真实答案，我们可以直接观察模型的参数是否符合预期来进行检验。
@ -106,10 +106,3 @@ PaddlePaddle将每个模型参数作为一个numpy数组单独存为一个文件
 从图中可以看到，虽然 `w` 和 `b` 都使用随机值初始化，但在起初的几轮训练中它们都在快速逼近真实值，并且后续仍在不断改进，使得最终得到的模型几乎与真实模型一致。

 这样，我们用PaddlePaddle解决了单变量线性回归问题， 包括数据输入、模型训练和最后的结果验证。
-
-5. 推荐后续阅读
---------------
-
- `安装/编译 <../build_and_install/index.html>`_ ：PaddlePaddle的安装与编译文档。
- `快速入门 <../demo/quick_start/index.html>`_ ：使用商品评论分类任务，系统性的介绍如何一步步改进，最终得到产品级的深度模型。
- `示例 <../demo/index.html>`_ ：各种实用案例，涵盖图像、文本、推荐等多个领域。
--- a/doc/getstarted/basic_usage/index_en.rst
+++ b/doc/getstarted/basic_usage/index_en.rst
@ -1,15 +1,15 @@
-Basic Usage
-=============
+Simple Linear Regression
+========================

 PaddlePaddle is a deep learning platform open-sourced by Baidu. With PaddlePaddle, you can easily train a classic neural network within a couple lines of configuration, or you can build sophisticated models that provide state-of-the-art performance on difficult learning tasks like sentiment analysis, machine translation, image caption and so on.

-1. A Classic Problem
---------------------
+Problem Background
+------------------

 Now, to give you a hint of what using PaddlePaddle looks like, let's start with a fundamental learning problem - `simple linear regression <https://en.wikipedia.org/wiki/Simple_linear_regression>`_: you have observed a set of two-dimensional data points of ``X`` and ``Y``, where ``X`` is an explanatory variable and ``Y`` is corresponding dependent variable, and you want to recover the underlying correlation between ``X`` and ``Y``. Linear regression can be used in many practical scenarios. For example, ``X`` can be a variable about house size, and ``Y`` a variable about house price. You can build a model that captures relationship between them by observing real estate markets.

-2. Prepare the Data
--------------------
+Prepare the Data
+-----------------

 Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's see how to recover this pattern only from observed data. Here is a piece of python code that feeds synthetic data to PaddlePaddle. The code is pretty self-explanatory, the only extra thing you need to add for PaddlePaddle is a definition of input data types.

@ -26,8 +26,8 @@ Suppose the true relationship can be characterized as ``Y = 2X + 0.3``, let's se
                x = random.random()
                yield [x], [2*x+0.3]

-3. Train a NeuralNetwork
-------------------------
+Train a NeuralNetwork
+----------------------

 To recover this relationship between ``X`` and ``Y``, we use a neural network with one layer of linear activation units and a square error cost layer. Don't worry if you are not familiar with these terminologies, it's just saying that we are starting from a random line ``Y' = wX + b`` , then we gradually adapt ``w`` and ``b`` to minimize the difference between ``Y'`` and ``Y``. Here is what it looks like in PaddlePaddle:

@ -73,8 +73,8 @@ Now that everything is ready, you can train the network with a simple command li
 This means that PaddlePaddle will train this network on the synthectic dataset for 30 passes, and save all the models under path ``./output``. You will see from the messages printed out during training phase that the model cost is decreasing as time goes by, which indicates we are getting a closer guess.


-4. Evaluate the Model
-----------------------
+Evaluate the Model
+-------------------

 Usually, a different dataset that left out during training phase should be used to evalute the models. However, we are lucky enough to know the real answer: ``w=2, b=0.3``, thus a better option is to check out model parameters directly.

--- a/doc/getstarted/build_and_install/build_from_source_en.md
+++ b/doc/getstarted/build_and_install/build_from_source_en.md
@ -49,10 +49,8 @@ PaddlePaddle supports some build options. To enable it, first you need to instal
 <tbody>
 <tr><td class="left">WITH_GPU</td><td class="left">Compile with GPU mode.</td></tr>
 <tr><td class="left">WITH_DOUBLE</td><td class="left">Compile with double precision floating-point, default: single precision.</td></tr>
-<tr><td class="left">WITH_GLOG</td><td class="left">Compile with glog. If not found, default: an internal log implementation.</td></tr>
-<tr><td class="left">WITH_GFLAGS</td><td class="left">Compile with gflags. If not found, default: an internal flag implementation.</td></tr>
 <tr><td class="left">WITH_TESTING</td><td class="left">Compile with gtest for PaddlePaddle's unit testing.</td></tr>
-<tr><td class="left">WITH_DOC</td><td class="left">	Compile to generate PaddlePaddle's docs, default: disabled (OFF).</td></tr>
+<tr><td class="left">WITH_DOC</td><td class="left">    Compile to generate PaddlePaddle's docs, default: disabled (OFF).</td></tr>
 <tr><td class="left">WITH_SWIG_PY</td><td class="left">Compile with python predict API, default: disabled (OFF).</td></tr>
 <tr><td class="left">WITH_STYLE_CHECK</td><td class="left">Compile with code style check, default: enabled (ON).</td></tr>
 </tbody>
--- a/doc/getstarted/build_and_install/cmake/compile_options.csv
+++ b/doc/getstarted/build_and_install/cmake/compile_options.csv
@ -6,8 +6,6 @@ WITH_AVX,是否编译含有AVX指令集的PaddlePaddle二进制文件,是
 WITH_PYTHON,是否内嵌PYTHON解释器。方便今后的嵌入式移植工作。,是
 WITH_STYLE_CHECK,是否编译时进行代码风格检查,是
 WITH_RDMA,是否开启RDMA,否
-WITH_GLOG,是否开启GLOG。如果不开启，则会使用一个简化版的日志，同时方便今后的嵌入式移植工作。,取决于是否寻找到GLOG
-WITH_GFLAGS,是否使用GFLAGS。如果不开启，则会使用一个简化版的命令行参数解析器，同时方便今后的嵌入式移植工作。,取决于是否寻找到GFLAGS
 WITH_TIMER,是否开启计时功能。如果开启会导致运行略慢，打印的日志变多，但是方便调试和测Benchmark,否
 WITH_TESTING,是否开启单元测试,取决于是否寻找到GTEST
 WITH_DOC,是否编译中英文文档,否
--- a/doc/getstarted/build_and_install/index_cn.rst
+++ b/doc/getstarted/build_and_install/index_cn.rst
@ -1,5 +1,5 @@
 编译与安装
-========================
+==========

 安装
 ++++
@ -24,4 +24,4 @@ PaddlePaddle提供数个预编译的二进制来进行安装，包括Docker镜
 ..  toctree::
    :maxdepth: 1

-    cmake/build_from_source_cn.rst
+    cmake/build_from_source_cn.rst
--- a/doc/getstarted/build_and_install/ubuntu_install_cn.rst
+++ b/doc/getstarted/build_and_install/ubuntu_install_cn.rst
@ -46,8 +46,6 @@ PaddlePaddle提供了ubuntu 14.04 deb安装包。
        with_double: OFF
        with_python: ON
        with_rdma: OFF
-        with_glog: ON
-        with_gflags: ON
        with_metric_learning:
        with_timer: OFF
        with_predict_sdk:
--- a/doc/getstarted/index_cn.rst
+++ b/doc/getstarted/index_cn.rst
@ -1,4 +1,4 @@
-GET STARTED
+新手入门
 ============

 ..  toctree::
--- a/doc/howto/concepts/nn_cn.rst
+++ b/doc/howto/concepts/nn_cn.rst
@ -1,3 +0,0 @@
-TBD
-
-目前正在书写中。敬请期待。
--- a/doc/howto/concepts/program_concepts_cn.rst
+++ b/doc/howto/concepts/program_concepts_cn.rst
@ -1,4 +0,0 @@
-TBD
-###
-
-目前正在书写中。敬请期待。
--- a/doc/howto/deep_model/index_cn.rst
+++ b/doc/howto/deep_model/index_cn.rst
@ -1,10 +0,0 @@
-How to Configure Deep Models
-============================
-
-..  toctree::
-  :maxdepth: 1
-
-  rnn/recurrent_group_cn.md
-  rnn/hierarchical_layer_cn.rst
-  rnn/hrnn_rnn_api_compare_cn.rst
-  rnn/hrnn_demo_cn.rst
--- a/doc/howto/deep_model/index_en.rst
+++ b/doc/howto/deep_model/index_en.rst
@ -1,7 +0,0 @@
-How to Configure Deep Models
-============================
-
-..  toctree::
-  :maxdepth: 1
-
-  rnn/rnn_en.rst
--- a/doc/howto/deep_model/rnn/hrnn_demo_cn.rst
+++ b/doc/howto/deep_model/rnn/hrnn_demo_cn.rst
@ -1,7 +0,0 @@
-..	_algo_hrnn_demo:
-
-#################
-双层RNN的使用示例
-#################
-
-TBD
--- a/doc/howto/deep_model/rnn/index_cn.rst
+++ b/doc/howto/deep_model/rnn/index_cn.rst
@ -0,0 +1,9 @@
+RNN相关模型
+===========
+
+..  toctree::
+  :maxdepth: 1
+
+  recurrent_group_cn.md
+  hierarchical_layer_cn.rst
+  hrnn_rnn_api_compare_cn.rst
--- a/doc/howto/deep_model/rnn/index_en.rst
+++ b/doc/howto/deep_model/rnn/index_en.rst
@ -0,0 +1,7 @@
+RNN Models
+==========
+
+..  toctree::
+  :maxdepth: 1
+
+  rnn_config_en.rst
--- a/Show More
+++ b/Show More