Disable clang-format check on a trick part of our source code

avx_docs
Yi Wang 8 years ago
commit 35ccf9c21d

@ -11,7 +11,7 @@ find_package(Protobuf REQUIRED)
# Check protobuf library version.
execute_process(COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --version
OUTPUT_VARIABLE PROTOBUF_VERSION)
OUTPUT_VARIABLE PROTOBUF_VERSION)
string(REPLACE "libprotoc " "" PROTOBUF_VERSION ${PROTOBUF_VERSION})
set(PROTOBUF_3 OFF)
@ -169,5 +169,4 @@ add_subdirectory(paddle)
add_subdirectory(python)
if(WITH_DOC)
add_subdirectory(doc)
add_subdirectory(doc_cn)
endif()

@ -119,7 +119,7 @@ function(link_paddle_exe TARGET_NAME)
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})
@ -136,10 +136,10 @@ function(link_paddle_exe TARGET_NAME)
endif()
if(WITH_GPU)
if(NOT WITH_DSO OR WITH_METRIC)
if(NOT WITH_DSO OR WITH_METRIC)
target_link_libraries(${TARGET_NAME}
${CUDNN_LIBRARY}
${CUDA_curand_LIBRARY})
${CUDA_curand_LIBRARY})
CUDA_ADD_CUBLAS_TO_TARGET(${TARGET_NAME})
endif()
@ -206,5 +206,5 @@ function(create_resources res_file output)
# Convert hex data for C compatibility
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
# Append data to output file
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}0};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
endfunction()

@ -7,25 +7,50 @@ if(NOT DEFINED SPHINX_THEME_DIR)
endif()
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/_build")
set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
# HTML output director
set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.en.in"
"${BINARY_BUILD_DIR_EN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
${SPHINX_CACHE_DIR}
${BINARY_BUILD_DIR_EN}
${SPHINX_CACHE_DIR_EN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
${SPHINX_HTML_DIR_EN})
add_dependencies(paddle_docs
gen_proto_py)
# configured documentation tools and intermediate build results
set(BINARY_BUILD_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_build")
# Sphinx cache with pickled ReST documents
set(SPHINX_CACHE_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR_CN "${CMAKE_CURRENT_BINARY_DIR}/cn/html")
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.cn.in"
"${BINARY_BUILD_DIR_CN}/conf.py"
@ONLY)
sphinx_add_target(paddle_docs_cn
html
${BINARY_BUILD_DIR_CN}
${SPHINX_CACHE_DIR_CN}
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR_CN})
add_dependencies(paddle_docs_cn
gen_proto_py)

@ -1,13 +1,13 @@
DataProvider的介绍
==================
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ 来自定义传数据的过程。如果有更复杂的使用或者需要更高的效率用户也可以在C++端自定义一个 ``DataProvider``
DataProvider是PaddlePaddle负责提供数据的模块。其作用是将数据传入内存或显存让神经网络可以进行训练或预测。用户可以通过简单使用Python接口 `PyDataProvider2 <pydataprovider2.html>`_ 来自定义传数据的过程。如果有更复杂的使用或者需要更高的效率用户也可以在C++端自定义一个 ``DataProvider``
PaddlePaddle需要用户在网络配置trainer_config.py中定义使用哪种DataProvider并且在DataProvider中实现如何访问训练文件列表train.list或测试文件列表test.list
- train.list和test.list存放在本地推荐直接存放到训练目录以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径或者数据库连接路径等。
- 由于这个地址会被DataProvider使用因此如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- train.list和test.list存放在本地推荐直接存放到训练目录以相对路径引用)。一般情况下,两者均为纯文本文件,其中每一行对应一个数据文件地址:
- 如果数据文件存于本地磁盘,这个地址则为它的绝对路径或相对路径(相对于PaddlePaddle程序运行时的路径)。
- 地址也可以为hdfs文件路径或者数据库连接路径等。
- 由于这个地址会被DataProvider使用因此如何解析该地址也是用户自定义DataProvider时需要考虑的地方。
- 如果没有设置test.list或设置为None那么在训练过程中不会执行测试操作否则会根据命令行参数指定的测试方式在训练过程中进行测试从而防止过拟合。

@ -24,18 +24,18 @@ of 28 x 28 pixels.
A small part of the original data as an example is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_train.txt
.. literalinclude:: src/mnist_train.txt
Each line of the data contains two parts, separated by :code:`;`. The first part is
label of an image. The second part contains 28x28 pixel float values.
Just write path of the above data into train.list. It looks like this:
.. literalinclude:: ../../../doc_cn/ui/data_provider/train.list
.. literalinclude:: src/train.list
The corresponding dataprovider is shown as below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.py
.. literalinclude:: src/mnist_provider.dict.py
The first line imports PyDataProvider2 package.
The main function is the process function, that has two parameters.
@ -74,7 +74,7 @@ sample by using keywords :code:`yield`.
Only a few lines of codes need to be added into the training configuration file,
you can take this as an example.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_config.py
.. literalinclude:: src/mnist_config.py
Here we specify training data by :code:`train.list`, and no testing data is specified.
The method which actually provide data is :code:`process`.
@ -83,7 +83,7 @@ User also can use another style to provide data, which defines the
:code:`data_layer`'s name explicitly when `yield`. For example,
the :code:`dataprovider` is shown as below.
.. literalinclude:: ../../../doc_cn/ui/data_provider/mnist_provider.dict.py
.. literalinclude:: src/mnist_provider.dict.py
:linenos:
If user did't give the :code:`data_layer`'s name, PaddlePaddle will use
@ -121,11 +121,11 @@ negative sentiment (marked by 0 and 1 respectively).
A small part of the original data as an example can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_train.txt
.. literalinclude:: src/sentimental_train.txt
The corresponding data provider can be found in the path below:
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_provider.py
.. literalinclude:: src/sentimental_provider.py
This data provider for sequential model is a little more complex than that
for MINST dataset.
@ -143,7 +143,7 @@ initialized. The :code:`on_init` function has the following parameters:
To pass these parameters into DataProvider, the following lines should be added
into trainer configuration file.
.. literalinclude:: ../../../doc_cn/ui/data_provider/sentimental_config.py
.. literalinclude:: src/sentimental_config.py
The definition is basically same as MNIST example, except:
* Load dictionary in this configuration

@ -0,0 +1,37 @@
API
===
DataProvider API
----------------
.. toctree::
:maxdepth: 1
data_provider/dataprovider_cn.rst
data_provider/pydataprovider2_cn.rst
.. _api_trainer_config:
Model Config API
----------------
.. toctree::
:maxdepth: 1
trainer_config_helpers/optimizers.rst
trainer_config_helpers/data_sources.rst
trainer_config_helpers/layers.rst
trainer_config_helpers/activations.rst
trainer_config_helpers/poolings.rst
trainer_config_helpers/networks.rst
trainer_config_helpers/evaluators.rst
trainer_config_helpers/attrs.rst
Applications API
----------------
.. toctree::
:maxdepth: 1
predict/swig_py_paddle_cn.rst

@ -7,7 +7,7 @@ DataProvider API
.. toctree::
:maxdepth: 1
data_provider/index_en.rst
data_provider/dataprovider_en.rst
data_provider/pydataprovider2_en.rst
.. _api_trainer_config:

@ -34,7 +34,7 @@ PaddlePaddle使用swig对常用的预测接口进行了封装通过编译会
如下是一段使用mnist model来实现手写识别的预测代码。完整的代码见 ``src_root/doc/ui/predict/predict_sample.py`` 。mnist model可以通过 ``src_root\demo\mnist`` 目录下的demo训练出来。
.. literalinclude:: ../../../doc/ui/predict/predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,121-136

@ -13,7 +13,7 @@ Here is a sample python script that shows the typical prediction process for the
MNIST classification problem. A complete sample code could be found at
:code:`src_root/doc/ui/predict/predict_sample.py`.
.. literalinclude:: ./predict_sample.py
.. literalinclude:: src/predict_sample.py
:language: python
:lines: 15-18,90-100,101-104

@ -62,7 +62,7 @@ source_suffix = ['.rst', '.md', '.Rmd']
source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
master_doc = 'index_cn'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.

@ -63,7 +63,7 @@ source_suffix = ['.rst', '.md', '.Rmd']
source_encoding = 'utf-8'
# The master toctree document.
master_doc = 'index'
master_doc = 'index_en'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@ -144,6 +144,6 @@ def setup(app):
# no c++ API for now
app.add_config_value('recommonmark_config', {
'url_resolver': lambda url: github_doc_root + url,
'enable_eval_rst': True,
'enable_eval_rst': True,
}, True)
app.add_transform(AutoStructify)

@ -1,5 +1,5 @@
####################
PaddlePaddle常见问题
FAQ
####################
.. contents::
@ -33,10 +33,9 @@ PyDataProvider使用的是异步加载同时在内存里直接随即选取数
个内存池实际上决定了shuffle的粒度。所以如果将这个内存池减小又要保证数据是随机的
那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为
.. literalinclude:: reduce_min_pool_size.py
.. literalinclude:: src/reduce_min_pool_size.py
这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 `这里
<../ui/data_provider/pydataprovider2.html#provider>`_ 。
这样做可以极大的减少内存占用,并且可能会加速训练过程,详细文档参考 `这里 <../ui/data_provider/pydataprovider2.html#provider>`_
神经元激活内存
++++++++++++++
@ -76,7 +75,7 @@ PaddlePaddle支持非常多的优化算法(Optimizer),不同的优化算法需
使用 :code:`pydataprovider`时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。
:code:`DataProvider` 缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。
.. literalinclude:: reduce_min_pool_size.py
.. literalinclude:: src/reduce_min_pool_size.py
同时 :code:`@provider` 接口有一个 :code:`cache` 参数来控制缓存方法,将其设置成 :code:`CacheType.CACHE_PASS_IN_MEM` 的话,会将第一个 :code:`pass` (过完所有训练数据即为一个pass)生成的数据缓存在内存里,在之后的 :code:`pass` 中,不会再从 :code:`python` 端读取数据,而是直接从内存的缓存里读取数据。这也会极大减少数据读入的耗时。
@ -90,11 +89,11 @@ PaddlePaddle支持Sparse的训练sparse训练需要训练特征是 :code:`spa
使用一个词前两个词和后两个词来预测这个中间的词。这个任务的DataProvider为\:
.. literalinclude:: word2vec_dataprovider.py
.. literalinclude:: src/word2vec_dataprovider.py
这个任务的配置为\:
.. literalinclude:: word2vec_config.py
.. literalinclude:: src/word2vec_config.py
更多关于sparse训练的内容请参考 `sparse训练的文档 <TBD>`_
@ -158,7 +157,7 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID相同名字
这里 :code:`hidden_a`:code:`hidden_b` 使用了同样的parameter和bias。并且softmax层的两个输入也使用了同样的参数 :code:`softmax_param`
7. *-cp27mu-linux_x86_64.whl is not a supported wheel on this platform.
-----------------------------------------------------------------------
---------------------------------------------------------------------------
出现这个问题的主要原因是系统编译wheel包的时候使用的 :code:`wheel` 包是最新的,
而系统中的 :code:`pip` 包比较老。具体的解决方法是,更新 :code:`pip` 包并重新编译PaddlePaddle。
@ -220,7 +219,7 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID相同名字
10. CMake源码编译, 找到的PythonLibs和PythonInterp版本不一致
----------------------------------------------------------
----------------------------------------------------------------
这是目前CMake寻找Python的逻辑存在缺陷如果系统安装了多个Python版本CMake找到的Python库和Python解释器版本可能有不一致现象导致编译PaddlePaddle失败。正确的解决方法是
用户强制指定特定的Python版本具体操作如下
@ -231,7 +230,7 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID相同名字
用户需要指定本机上Python的路径``<exc_path>``, ``<lib_path>``, ``<inc_path>``
10. A protocol message was rejected because it was too big
10. A protocol message was rejected because it was too big
----------------------------------------------------------
如果在训练NLP相关模型时出现以下错误

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save