Merge remote branch 'origin/develop' into fix_data_sources

avx_docs
wangyanfei01 9 years ago
commit 7598846cf5

@ -11,7 +11,7 @@ find_package(Protobuf REQUIRED)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
set(PROTOBUF_3 OFF)
@ -25,8 +25,8 @@ find_package(ZLIB REQUIRED)
find_package(NumPy REQUIRED)
find_package(Threads REQUIRED)
find_package(AVX QUIET)
find_package(Glog)
find_package(Gflags QUIET)
find_package(Glog REQUIRED)
find_package(Gflags REQUIRED)
find_package(GTest)
find_package(Sphinx)
find_package(Doxygen)
@ -40,8 +40,6 @@ option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND})
option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
@ -136,16 +134,12 @@ else(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG)
include_directories(${LIBGLOG_INCLUDE_DIR})
endif()
# glog
include_directories(${LIBGLOG_INCLUDE_DIR})
if(WITH_GFLAGS)
add_definitions(-DPADDLE_USE_GFLAGS)
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
include_directories(${GFLAGS_INCLUDE_DIRS})
endif()
#gflags
add_definitions(-DGFLAGS_NS=${GFLAGS_NAMESPACE})
include_directories(${GFLAGS_INCLUDE_DIRS})
if(WITH_TESTING)
enable_testing()
@ -169,5 +163,4 @@ add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
add_subdirectory(doc_cn)
endif()

@ -0,0 +1 @@
./doc/howto/contribute_to_paddle_en.md

@ -3,7 +3,7 @@ http_archive(
name="protobuf",
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix="protobuf-3.1.0", )
strip_prefix="protobuf-3.1.0")
# External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
@ -12,4 +12,20 @@ new_http_archive(
url="https://github.com/google/googletest/archive/release-1.7.0.zip",
sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
build_file="third_party/gtest.BUILD",
strip_prefix="googletest-release-1.7.0", )
strip_prefix="googletest-release-1.7.0")
# External dependency to gflags. This method comes from
# https://github.com/gflags/example/blob/master/WORKSPACE.
new_git_repository(
name="gflags",
tag="v2.2.0",
remote="https://github.com/gflags/gflags.git",
build_file="third_party/gflags.BUILD")
# External dependency to glog. This method comes from
# https://github.com/reyoung/bazel_playground/blob/master/WORKSPACE
new_git_repository(
name="glog",
remote="https://github.com/google/glog.git",
commit="b6a5e0524c28178985f0d228e9eaa43808dbec3c",
build_file="third_party/glog.BUILD")

@ -72,6 +72,7 @@ function( Sphinx_add_target target_name builder conf cache source destination )
${source}
${destination}
COMMENT "Generating sphinx documentation: ${builder}"
COMMAND ln -s ${destination}/index_*.html ${destination}/index.html
)
set_property(
@ -143,4 +144,4 @@ function( Sphinx_add_targets target_base_name conf source base_destination )
add_dependencies( ${target_base_name}_linkcheck ${_dependencies} )
endif()
endfunction()
endfunction()

@ -14,13 +14,9 @@ if(WITH_STYLE_CHECK)
find_package(PythonInterp REQUIRED)
endif()
if(WITH_GLOG)
find_package(Glog REQUIRED)
endif()
find_package(Glog REQUIRED)
if(WITH_GFLAGS)
find_package(Gflags REQUIRED)
endif()
find_package(Gflags REQUIRED)
if(WITH_TESTING)
find_package(GTest REQUIRED)

@ -65,7 +65,7 @@ endmacro()
# link_paddle_exe
# add paddle library for a paddle executable, such as trainer, pserver.
#
# It will handle WITH_PYTHON/WITH_GLOG etc.
# It will handle WITH_PYTHON etc.
function(link_paddle_exe TARGET_NAME)
if(WITH_RDMA)
generate_rdma_links()
@ -108,6 +108,8 @@ function(link_paddle_exe TARGET_NAME)
paddle_cuda
${METRIC_LIBS}
${PROTOBUF_LIBRARY}
${LIBGLOG_LIBRARY}
${GFLAGS_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT}
${CBLAS_LIBS}
${ZLIB_LIBRARIES}
@ -119,27 +121,17 @@ function(link_paddle_exe TARGET_NAME)
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})
endif()
if(WITH_GLOG)
target_link_libraries(${TARGET_NAME}
${LIBGLOG_LIBRARY})
endif()
if(WITH_GFLAGS)
target_link_libraries(${TARGET_NAME}
${GFLAGS_LIBRARIES})
endif()
if(WITH_GPU)
if(NOT WITH_DSO OR WITH_METRIC)
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
@ -206,5 +198,5 @@ function(create_resources res_file output)
# Convert hex data for C compatibility
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
# Append data to output file
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
endfunction()

@ -43,13 +43,13 @@ def extract_dict_features(pair_file, feature_file):
mark[verb_index] = 1
ctx_0 = sentence_list[verb_index]
if verb_index < len(labels_list) - 2:
if verb_index < len(labels_list) - 1:
mark[verb_index + 1] = 1
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 3:
if verb_index < len(labels_list) - 2:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
else:

@ -7,25 +7,50 @@ if(NOT DEFINED SPHINX_THEME_DIR)
endif()
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
${SPHINX_CACHE_DIR}
${BINARY_BUILD_DIR_EN}
${SPHINX_CACHE_DIR_EN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_docs
gen_proto_py)
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.cn.in"
"${BINARY_BUILD_DIR_CN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs_cn
html
${BINARY_BUILD_DIR_CN}
${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_docs_cn
gen_proto_py)

@ -0,0 +1,11 @@
关于PaddlePaddle
================
PaddlePaddle是一个最早由百度科学家和工程师共同研发的并行分布式深度学习平台兼备易用性、高效性、灵活性和可扩展性目前已被百度内部多个产品线广泛使用。
PaddlePaddle目前已经开放源码, 但是远未完善,我们希望能在这个基础上不断的改进、扩展和延伸。
同时我们希望广大开发者积极提供反馈和贡献源代码,建立一个活跃的开源社区。
致谢
--------
在此特别感谢PaddlePaddle的[所有贡献者](https://github.com/PaddlePaddle/Paddle/graphs/contributors)。

@ -1,13 +1,13 @@
DataProvider的介绍
==================
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ 来自定义传数据的过程。如果有更复杂的使用或者需要更高的效率用户也可以在C++端自定义一个 ``DataProvider``
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ 来自定义传数据的过程。如果有更复杂的使用或者需要更高的效率用户也可以在C++端自定义一个 ``DataProvider``
PaddlePaddle需要用户在网络配置trainer_config.py中定义使用哪种DataProvider并且在DataProvider中实现如何访问训练文件列表train.list或测试文件列表test.list
- train.list和test.list存放在本地推荐直接存放到训练目录以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径或者数据库连接路径等。
- 由于这个地址会被DataProvider使用因此如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- train.list和test.list存放在本地推荐直接存放到训练目录以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径或者数据库连接路径等。
- 由于这个地址会被DataProvider使用因此如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- 如果没有设置test.list或设置为None那么在训练过程中不会执行测试操作否则会根据命令行参数指定的测试方式在训练过程中进行测试从而防止过拟合。

@ -1,4 +1,4 @@
.. _api_pydataprovider2_en:
.. _api_pydataprovider2:
PyDataProvider2
===============
@ -24,18 +24,18 @@ of 28 x 28 pixels.
A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
.. literalinclude:: src/mnist_train.txt
Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list
.. literalinclude:: src/train.list
The corresponding dataprovider is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py
.. literalinclude:: src/mnist_provider.dict.py
The first line imports PyDataProvider2 package.
The main function is the process function, that has two parameters.
@ -74,7 +74,7 @@ sample by using keywords :code:`yield`.
Only a few lines of codes need to be added into the training configuration file,
you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
.. literalinclude:: src/mnist_config.py
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
@ -83,7 +83,7 @@ User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
.. literalinclude:: src/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
@ -104,7 +104,7 @@ And PaddlePadle will do all of the rest things\:
Is this cool?
.. _api_pydataprovider2_en_sequential_model:
.. _api_pydataprovider2_sequential_model:
DataProvider for the sequential model
-------------------------------------
@ -121,11 +121,11 @@ negative sentiment (marked by 0 and 1 respectively).
A small part of the original data as an example can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_train.txt
.. literalinclude:: src/sentimental_train.txt
The corresponding data provider can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py
.. literalinclude:: src/sentimental_provider.py
This data provider for sequential model is a little more complex than that
for MINST dataset.
@ -143,7 +143,7 @@ initialized. The :code:`on_init` function has the following parameters:
To pass these parameters into DataProvider, the following lines should be added
into trainer configuration file.
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_config.py
.. literalinclude:: src/sentimental_config.py
The definition is basically same as MNIST example, except:
* Load dictionary in this configuration

@ -0,0 +1,37 @@
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst

@ -7,7 +7,7 @@ DataProvider API
.. toctree::
:maxdepth: 1
data_provider/index_en.rst
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:

@ -34,7 +34,7 @@ PaddlePaddle使用swig对常用的预测接口进行了封装通过编译会
如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。
.. literalinclude:: ../../../doc/ui/predict/predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,121-136

@ -13,7 +13,7 @@ Here is a sample python script that shows the typical prediction process for the
MNIST classification problem. A complete sample code could be found at
:code:`src_root/doc/ui/predict/predict_sample.py`.
.. literalinclude:: ./predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,90-100,101-104
@ -23,7 +23,7 @@ python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
PaddlePaddle with command line arguments, for more about command line arguments
see :ref:`cmd_detail_introduction_en` .
see :ref:`cmd_detail_introduction` .
* Parse the configuration file that is used in training with :code:`parse_config()`.
Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
@ -36,7 +36,7 @@ python's :code:`help()` function. Let's walk through the above python script:
- Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderConverter that can accept the same input data with
PyDataProvider2, for more information please refer to document
of :ref:`api_pydataprovider2_en` .
of :ref:`api_pydataprovider2` .
* Do the prediction with :code:`forwardTest()`, which takes the converted
input data and outputs the activations of the output layer.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save