From 35bed7ce1cb48bd8e35e0a76fbcf7359eabc37ef Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Wed, 21 Mar 2018 19:10:52 +0800
Subject: [PATCH 01/62] Add contents for manully build documentation(cn
version)
---
doc/v2/dev/write_docs_cn.rst | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index a055bb04c0..674efabcef 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -64,9 +64,31 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
不使用PaddlePaddle.org工具
--------------------------
-使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
+使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。该方法与 `从源码编译PaddlePaddle `_ 相似,通过从源码中构建可用于编译PaddlePaddle文档的Docker镜像并运行,在进入Docker容器后使用源码中的脚本构建PaddlePaddle文档,具体步骤如下:
-[TBD]
+.. code-block:: bash
+
+ mkdir paddle
+ cd paddle
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # 从源码中构建可用于编译PaddlePaddle文档的Docker镜像
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev /bin/bash
+
+ # 进入Docker容器后使用build.sh脚本构建PaddlePaddle文档
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+注:上述命令把当前目录(源码根目录)映射为 container 里的 :code:`/paddle` 目录。
+
+编译完成后,进入 ``paddle/build/doc/v2`` 目录,该目录下生成了 ``cn/html/`` 、 ``en/html`` 以及 ``api/en/html`` 共三个子目录,分别进入这些目录下,执行以下命令:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+在浏览器中输入http://localhost:8088就可以看到编译生成的中/英文的文档页面和英文的API页面。
如果不想使用Docker,也可以使用以下命令直接构建PaddlePaddle文档,即
@@ -75,6 +97,7 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
mkdir paddle
cd paddle
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
@@ -96,7 +119,9 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
python -m SimpleHTTPServer 8088
-在浏览器中输入http://localhost:8088就可以看到编译生成的中/英文的文档页面和英文的API页面,下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+在浏览器中输入http://localhost:8088就可以看到编译生成的中/英文的文档页面和英文的API页面。
+
+下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
.. image:: src/doc_en.png
:align: center
From 154a1db04916efe74baa37e06128a43787fe6716 Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Thu, 22 Mar 2018 11:21:31 +0800
Subject: [PATCH 02/62] Adjust some commands
---
doc/v2/dev/write_docs_cn.rst | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 674efabcef..8514e635ff 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -18,9 +18,6 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
- mkdir paddlepaddle # Create paddlepaddle working directory
- cd paddlepaddle
-
# Clone the content repositories
git clone https://github.com/PaddlePaddle/Paddle.git
git clone https://github.com/PaddlePaddle/book.git
@@ -38,9 +35,6 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
- mkdir paddlepaddle # Create paddlepaddle working directory
- cd paddlepaddle
-
# Clone the content repositories and PaddlePaddle.org
git clone https://github.com/PaddlePaddle/Paddle.git
git clone https://github.com/PaddlePaddle/book.git
@@ -68,14 +62,12 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
- mkdir paddle
- cd paddle
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
# 从源码中构建可用于编译PaddlePaddle文档的Docker镜像
docker build -t paddle:dev .
- docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" paddle:dev /bin/bash
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
# 进入Docker容器后使用build.sh脚本构建PaddlePaddle文档
bash -x /paddle/paddle/scripts/docker/build.sh
@@ -94,8 +86,6 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
- mkdir paddle
- cd paddle
git clone https://github.com/PaddlePaddle/Paddle.git
cd Paddle
mkdir -p build
From 86626a74780176a991064ce6ea7ac5d4bd683775 Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Wed, 28 Mar 2018 15:22:43 +0800
Subject: [PATCH 03/62] Add English version
---
doc/v2/dev/write_docs_cn.rst | 6 ++++++
doc/v2/dev/write_docs_en.rst | 24 ++++++++++++++++++++++--
2 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index f18dd86b51..83d065d3bb 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -19,6 +19,9 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
+ mkdir paddlepaddle # Create paddlepaddle working directory
+ cd paddlepaddle
+
# Clone the content repositories
git clone https://github.com/PaddlePaddle/Paddle.git
git clone https://github.com/PaddlePaddle/book.git
@@ -36,6 +39,9 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
.. code-block:: bash
+ mkdir paddlepaddle # Create paddlepaddle working directory
+ cd paddlepaddle
+
# Clone the content repositories and PaddlePaddle.org
git clone https://github.com/PaddlePaddle/Paddle.git
git clone https://github.com/PaddlePaddle/book.git
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index 15ff0d34ad..8bc43be6de 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -68,9 +68,29 @@ Please `click here `_ on how to install Docker. After Docker is installed, you could use the scripts in the source directory to build the documentation.
+Build PaddlePaddle's documentation with Docker,you need to install Docker first. Please refer to `Docker's official website `_ on how to install Docker. This method is quite similar to ` Build From Sources `_ , by constructing, from source code, a docker image that can be used to build PaddlePaddle documentation. Enter the Docker container and use the script ``build.sh`` in the source directory to build the PaddlePaddle documentation. The specific steps are as follows:
-[TBD]
+.. code-block:: bash
+
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # Construct a docker image from source code
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
+
+ # Use build.sh to build PaddlePaddle documentation
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+Note: The above commands maps the current directory (source root directory) to the :code:`/paddle` directory in the container.
+
+After compiling, you could enter the ``paddle/build/doc/v2`` directory, where three subdirectories ``cn/html/``, ``en/html`` and ``api/en/html`` are generated. Please enter these directories respectively and execute the following commands:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+Use a web browser and navigate to http://localhost:8000, you could see the compiled Chinese/English documents page and the English APIs page.
If you do not wish to use Docker, you can also use the following commands to directly build the PaddlePaddle documentation.
From e1290c4fd7facfa9abfbb6e710ab3fa5f4ed3d10 Mon Sep 17 00:00:00 2001
From: wanghaoshuang
Date: Wed, 28 Mar 2018 23:09:32 +0800
Subject: [PATCH 04/62] Make Average Model support for 'moving mean' and
'moving variance' of batch_normal op
---
python/paddle/fluid/optimizer.py | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 180575c35d..d21320f705 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -850,23 +850,39 @@ class ModelAverage(Optimizer):
self.min_average_window = min_average_window
self.max_average_window = max_average_window
self.params_grads = params_grads
+
+ # append 'moving mean' and 'moving variance' to self.params_grads
+ pattern = re.compile(r"batch_norm_\d+\.w_[1,2]")
+ for param in framework.default_main_program().global_block(
+ ).all_parameters():
+ if pattern.match(param.name) is not None:
+ self.params_grads.append((param, None))
+ # create a tmp gradient variable to backup parameter value
+ # for parameter whose grad is None
+ for i, param_grad in enumerate(self.params_grads):
+ param, grad = param_grad
+ if grad is None:
+ grad = param.block.create_var(
+ name=unique_name.generate(".".join([param.name, 'tmp'])),
+ dtype=param.dtype,
+ persistable=False,
+ stop_gradient=stop_gradient)
+ self.params_grads[i] = (param, grad)
+
for param, grad in self.params_grads:
- if grad is not None:
- self._append_average_accumulate_op(param)
+ self._append_average_accumulate_op(param)
self.apply_program = Program()
block = self.apply_program.global_block()
with program_guard(main_program=self.apply_program):
for param_grad in self.params_grads:
- if param_grad[1] is not None:
- self._add_average_apply_op(block, param_grad)
+ self._add_average_apply_op(block, param_grad)
self.restore_program = Program()
block = self.restore_program.global_block()
with program_guard(main_program=self.restore_program):
for param_grad in self.params_grads:
- if param_grad[1] is not None:
- self._add_average_restore_op(block, param_grad)
+ self._add_average_restore_op(block, param_grad)
def _add_average_apply_op(self, block, param_grad):
param = block.clone_variable(param_grad[0])
From 62373edb0c6ad7b55ca7af5b632ecd415e9d51bb Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Fri, 30 Mar 2018 19:27:08 +0800
Subject: [PATCH 05/62] Adjust
---
doc/v2/dev/write_docs_cn.rst | 7 ++-----
doc/v2/dev/write_docs_en.rst | 11 ++++-------
2 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 83d065d3bb..0795b2d146 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -100,13 +100,10 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# 如果只需要构建使用文档,则执行以下命令
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# 如果只需要构建API,则执行以下命令
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index 8bc43be6de..f03daa300f 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -96,21 +96,18 @@ If you do not wish to use Docker, you can also use the following commands to dir
.. code-block:: bash
- mkdir paddle
- cd paddle
+
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# If you only need to build documents, use the following commands
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# If you only need to build APIs, use the following commands
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
$processors indicates that as many processes as the CPU cores are started to compile in parallel. It should be set according to the number of CPU cores of your machine.
From abc630ecf9e01f7c09b8833ad25fa60cb9cbc6c8 Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Fri, 30 Mar 2018 21:12:29 +0800
Subject: [PATCH 06/62] Adjust descriptions for building fluid docs and api
---
doc/v2/dev/write_docs_cn.rst | 4 ++--
doc/v2/dev/write_docs_en.rst | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 0795b2d146..887d92942e 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -107,13 +107,13 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
-编译完成后,进入 ``doc/v2`` 目录,如果选择构建文档则会在该目录下生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会生成 ``api/en/html`` 目录,分别进入这些目录下,执行以下命令:
+编译完成后,会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,如果选择构建文档则会在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会在这两个目录下分别生成 ``api/en/html`` 目录,分别进入这些子目录下,执行以下命令:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-在浏览器中输入 http://localhost:8088 就可以看到编译生成的中/英文的文档页面和英文的API页面,下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面,下图为生成的 ``v2`` 英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
.. image:: src/doc_en.png
:align: center
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index f03daa300f..435bbdb60f 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -111,13 +111,13 @@ If you do not wish to use Docker, you can also use the following commands to dir
$processors indicates that as many processes as the CPU cores are started to compile in parallel. It should be set according to the number of CPU cores of your machine.
-After the compilation is complete, enter the ``doc/v2`` directory. If you chose to build documents, it will generate ``cn/html/`` and ``en/html`` subdirectories under this directory. If you chose to build APIs,it will generate``api/en/html`` subdirectory. Please enter these directories respectively and execute the following commands:
+After the compilation is complete, there should be two generated directories: ``doc/v2`` and ``doc/fluid`` . If you chose to build documents, two subdirectories ``cn/html/`` and ``en/html`` will be generated in both two directories. If you chose to build APIs,a subdirectory ``api/en/html`` will be generated. Please enter these directories respectively and execute the following commands:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-Use a web browser and navigate to http://localhost:8000, you could see the compiled Chinese/English documents page and the English APIs page. The following figure is an example of the built English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page. The following figure is an example of the built ``v2`` 's English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
.. image:: src/doc_en.png
:align: center
From 9708b21f191b3ff606651dfaeb7cf65dfd250881 Mon Sep 17 00:00:00 2001
From: wanghaoshuang
Date: Mon, 2 Apr 2018 10:51:31 +0800
Subject: [PATCH 07/62] Refine average model option 1. Add attr 'average' into
ParamAttr. 2. Make 'params_grads' optional for AverageModel. 3. Add option
'average_mean' and 'average_variance' for batch_normal.
---
python/paddle/fluid/framework.py | 4 +++-
python/paddle/fluid/layers/nn.py | 12 +++++++++---
python/paddle/fluid/optimizer.py | 28 ++++++++++++----------------
python/paddle/fluid/param_attr.py | 9 ++++++---
4 files changed, 30 insertions(+), 23 deletions(-)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 3e78788f47..92c299a4b6 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1137,6 +1137,8 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
+ self.average = kwargs.get('average', True)
+
def __str__(self):
return self.to_string(True)
@@ -1157,7 +1159,7 @@ class Parameter(Variable):
if with_details:
res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer",
- "gradient_clip_attr")
+ "gradient_clip_attr", "average")
for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name)))
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 0332556f62..3265ff733b 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1486,7 +1486,9 @@ def batch_norm(input,
in_place=False,
name=None,
moving_mean_name=None,
- moving_variance_name=None):
+ moving_variance_name=None,
+ average_mean=True,
+ average_variance=True):
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
@@ -1517,7 +1519,10 @@ def batch_norm(input,
mean = helper.create_parameter(
attr=ParamAttr(
- name=moving_mean_name, initializer=Constant(0.0), trainable=False),
+ name=moving_mean_name,
+ initializer=Constant(0.0),
+ trainable=False,
+ average=average_variance),
shape=param_shape,
dtype=input.dtype)
mean.stop_gradient = True
@@ -1526,7 +1531,8 @@ def batch_norm(input,
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
- trainable=False),
+ trainable=False,
+ average=average_mean),
shape=param_shape,
dtype=input.dtype)
variance.stop_gradient = True
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index d21320f705..560257a356 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
+import re
from collections import defaultdict
from paddle.fluid.framework import Program
import framework
@@ -818,8 +818,8 @@ class ModelAverage(Optimizer):
min_average_window, max_average_window and current update times.
Args:
- params_grads: A list of parameter-grad variable pairs.
average_window_rate: The rate of average window.
+ params_grads: A list of parameter-grad variable pairs.
min_average_window: The minimum size of average window.
max_average_window: The maximum size of average window.
@@ -840,8 +840,8 @@ class ModelAverage(Optimizer):
"""
def __init__(self,
- params_grads,
- average_window_rate,
+ average_window_rate=0.15,
+ params_grads=None,
min_average_window=10000,
max_average_window=10000,
**kwargs):
@@ -849,25 +849,21 @@ class ModelAverage(Optimizer):
self.average_window = average_window_rate
self.min_average_window = min_average_window
self.max_average_window = max_average_window
- self.params_grads = params_grads
- # append 'moving mean' and 'moving variance' to self.params_grads
- pattern = re.compile(r"batch_norm_\d+\.w_[1,2]")
+ self.params_grads = [] if params_grads is None else params_grads
+ params = {}
+ for param, grad in self.params_grads:
+ params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
- if pattern.match(param.name) is not None:
- self.params_grads.append((param, None))
- # create a tmp gradient variable to backup parameter value
- # for parameter whose grad is None
- for i, param_grad in enumerate(self.params_grads):
- param, grad = param_grad
- if grad is None:
+ if param.name not in params and param.average:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
persistable=False,
- stop_gradient=stop_gradient)
- self.params_grads[i] = (param, grad)
+ stop_gradient=True)
+ params[param.name] = (param, grad)
+ self.params_grads = params.values()
for param, grad in self.params_grads:
self._append_average_accumulate_op(param)
diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py
index 255cd21043..74b968f8ee 100644
--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
@@ -28,13 +28,15 @@ class ParamAttr(object):
learning_rate=1.0,
regularizer=None,
trainable=True,
- gradient_clip=None):
+ gradient_clip=None,
+ average=True):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.gradient_clip = gradient_clip
+ self.average = average
def set_default_initializer(self, initializer):
if initializer is None:
@@ -80,7 +82,8 @@ class ParamAttr(object):
},
'regularizer': self.regularizer,
'trainable': self.trainable,
- 'gradient_clip_attr': self.gradient_clip
+ 'gradient_clip_attr': self.gradient_clip,
+ 'average': self.average
}
if with_initializer:
kwargs['initializer'] = self.initializer
@@ -90,7 +93,7 @@ class ParamAttr(object):
class WeightNormParamAttr(ParamAttr):
"""
Used for weight normalization. Any field in ParamAttr can also be set here.
- Besides, an extra field dim can be set to indicate the dimension except
+ Besides, an extra field dim can be set to indicate the dimension except
which to normalize.
"""
# List to record the parameters reparameterized by weight normalization.
From af242901232464d8a59d26cba9084ffe22562fdf Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Wed, 4 Apr 2018 15:05:46 +0800
Subject: [PATCH 08/62] Add 'buffer_size' api for open_files op
---
paddle/fluid/operators/reader/open_files_op.cc | 15 ++++++++++-----
python/paddle/fluid/layers/io.py | 12 ++++++++++--
2 files changed, 20 insertions(+), 7 deletions(-)
diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc
index eacedeea88..db4e619e7b 100644
--- a/paddle/fluid/operators/reader/open_files_op.cc
+++ b/paddle/fluid/operators/reader/open_files_op.cc
@@ -38,8 +38,9 @@ class MultipleReader : public framework::ReaderBase {
};
MultipleReader(const std::vector& file_names,
- const std::vector& dims, size_t thread_num)
- : file_names_(file_names), dims_(dims) {
+ const std::vector& dims, size_t thread_num,
+ size_t buffer_size)
+ : file_names_(file_names), dims_(dims), buffer_size_(buffer_size) {
prefetchers_.resize(thread_num);
StartNewScheduler();
}
@@ -60,6 +61,7 @@ class MultipleReader : public framework::ReaderBase {
std::vector dims_;
std::thread scheduler_;
std::vector prefetchers_;
+ size_t buffer_size_;
framework::Channel* waiting_file_idx_;
framework::Channel* available_thread_idx_;
framework::Channel>* buffer_;
@@ -92,7 +94,7 @@ void MultipleReader::StartNewScheduler() {
waiting_file_idx_ = framework::MakeChannel(file_names_.size());
available_thread_idx_ = framework::MakeChannel(thread_num);
buffer_ =
- framework::MakeChannel>(thread_num);
+ framework::MakeChannel>(buffer_size_);
for (size_t i = 0; i < file_names_.size(); ++i) {
waiting_file_idx_->Send(&i);
@@ -197,11 +199,13 @@ class OpenFilesOp : public framework::OperatorBase {
const auto& file_names = Attr>("file_names");
PADDLE_ENFORCE(!file_names.empty(), "No file to be read!");
const size_t thread_num = Attr("thread_num");
+ const size_t buffer_size = Attr("buffer_size");
auto* out = scope.FindVar(Output("Out"))
->template GetMutable();
- out->Reset(new MultipleReader(
- file_names, RestoreShapes(shape_concat, ranks), thread_num));
+ out->Reset(new MultipleReader(file_names,
+ RestoreShapes(shape_concat, ranks),
+ thread_num, buffer_size));
}
};
@@ -212,6 +216,7 @@ class OpenFilesOpMaker : public FileReaderMakerBase {
AddAttr>("file_names", "Files to be read.");
AddAttr("thread_num", "The maximal concurrent prefetch thread number.")
.GreaterThan(0);
+ AddAttr("buffer_size", "The size of prefetch buffer.").GreaterThan(0);
AddComment(R"DOC(
OpenFiles Operator
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index bd7e9c30fe..da5b4853d3 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -287,7 +287,14 @@ def open_recordio_file(filename, shapes, lod_levels, dtypes):
startup_var)
-def open_files(filenames, thread_num, shapes, lod_levels, dtypes):
+def open_files(filenames,
+ shapes,
+ lod_levels,
+ dtypes,
+ thread_num,
+ buffer_size=None):
+ if buffer_size is None:
+ buffer_size = thread_num
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
@@ -308,7 +315,8 @@ def open_files(filenames, thread_num, shapes, lod_levels, dtypes):
'lod_levels': lod_levels,
'ranks': ranks,
'file_names': filenames,
- 'thread_num': thread_num
+ 'thread_num': thread_num,
+ 'buffer_size': buffer_size
})
startup_var.desc.set_dtypes(dtypes)
From 6dcfd97a9285161efa767516d466a084b6a45bed Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Wed, 4 Apr 2018 15:35:28 +0800
Subject: [PATCH 09/62] add docstring
---
python/paddle/fluid/layers/io.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index da5b4853d3..97ac01b775 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -293,8 +293,40 @@ def open_files(filenames,
dtypes,
thread_num,
buffer_size=None):
+ """
+ Open files
+
+ This layer takes a list of files to read from and returns a Reader Variable. Via the Reader Variable, we can get data from given files.
+
+ Args:
+ filenames(list): The list of file names.
+ shapes(list): List of tuples which declaring data shapes.
+ lod_levels(list): List of ints which declaring data lod_level.
+ dtypes(list): List of strs which declaring data type.
+ thread_num(int): The maximal concurrent prefetch thread number.
+ buffer_size(int): The size of prefetch buffer.
+
+ Returns:
+ Variable: A Reader Variable via which we can get file data.
+
+ Examples:
+ .. code-block:: python
+
+ reader = fluid.layers.open_files(filenames=['./data1.recordio',
+ './data2.recordio'],
+ shapes=[(3,224,224), (1)],
+ lod_levels=[0, 0],
+ dtypes=['float32', 'int64'],
+ thread_num=2,
+ buffer_size=2)
+
+ # Via the reader, we can use 'read_file' layer to get data:
+ image, label = fluid.layers.read_file(reader)
+ """
if buffer_size is None:
buffer_size = thread_num
+ if isinstance(filenames, basestring):
+ filenames = [filenames]
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
From 2e40660e7a81962a56d89bdd1e2a86d9f78cab35 Mon Sep 17 00:00:00 2001
From: wanghaoshuang
Date: Wed, 4 Apr 2018 18:13:45 +0800
Subject: [PATCH 10/62] Fix some issues.
---
python/paddle/fluid/framework.py | 4 ++--
python/paddle/fluid/layers/nn.py | 20 +++++++++++---------
python/paddle/fluid/optimizer.py | 4 ++--
python/paddle/fluid/param_attr.py | 6 +++---
4 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 370a477932..6120d66c12 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -1155,7 +1155,7 @@ class Parameter(Variable):
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
- self.average = kwargs.get('average', True)
+ self.do_model_average = kwargs.get('do_model_average', None)
def __str__(self):
return self.to_string(True)
@@ -1177,7 +1177,7 @@ class Parameter(Variable):
if with_details:
res_str = Variable.to_string(self, throw_on_error, True)
additional_attr = ("trainable", "optimize_attr", "regularizer",
- "gradient_clip_attr", "average")
+ "gradient_clip_attr", "do_model_average")
for attr_name in additional_attr:
res_str += "%s: %s\n" % (attr_name,
str(getattr(self, attr_name)))
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index e5ae10636d..37ce738275 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1489,8 +1489,7 @@ def batch_norm(input,
name=None,
moving_mean_name=None,
moving_variance_name=None,
- average_mean=True,
- average_variance=True):
+ do_model_average_for_mean_and_var=False):
"""
This function helps create an operator to implement
the BatchNorm layer using the configurations from the input parameters.
@@ -1519,12 +1518,15 @@ def batch_norm(input,
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
+ if do_model_average_for_mean_and_var:
+ do_model_average_for_mean_and_var = None
+
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
- average=average_variance),
+ do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=input.dtype)
mean.stop_gradient = True
@@ -1534,7 +1536,7 @@ def batch_norm(input,
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False,
- average=average_mean),
+ do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=input.dtype)
variance.stop_gradient = True
@@ -3352,14 +3354,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
Here are some examples to explain it.
1. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
- is [6, 8], the reshape operator will transform x into a 2-D tensor with
+ is [6, 8], the reshape operator will transform x into a 2-D tensor with
shape [6, 8] and leaving x's data unchanged.
2. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
specified is [2, 3, -1, 2], the reshape operator will transform x into a
4-D tensor with shape [2, 3, 4, 2] and leaving x's data unchanged. In this
- case, one dimension of the target shape is set to -1, the value of this
- dimension is inferred from the total element number of x and remaining
+ case, one dimension of the target shape is set to -1, the value of this
+ dimension is inferred from the total element number of x and remaining
dimensions.
3. Given a 3-D tensor x with a shape [2, 4, 6], and the target shape
@@ -3593,7 +3595,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None):
def pad(x, paddings, pad_value=0., name=None):
"""
Pads a tensor with a constant value given by :attr:`pad_value`, and the
- padded width is specified by :attr:`paddings`.
+ padded width is specified by :attr:`paddings`.
Specifically, the number of values padded before the contents of :attr:`x`
in dimension :attr:`i` is indicated by :attr:`paddings[i]`, and the number
@@ -3621,7 +3623,7 @@ def pad(x, paddings, pad_value=0., name=None):
x (Variable): The input tensor variable.
paddings (list): A list of integers. Its elements specify the padded
width before and after for each dimension in turn.
- The length of :attr:paddings must be
+ The length of :attr:paddings must be
:math:`rank(x) \\times 2`.
pad_value (float): The constant value used to pad.
name(str|None): A name for this layer(optional). If set None, the layer
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 560257a356..1917b7d044 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -840,7 +840,7 @@ class ModelAverage(Optimizer):
"""
def __init__(self,
- average_window_rate=0.15,
+ average_window_rate,
params_grads=None,
min_average_window=10000,
max_average_window=10000,
@@ -856,7 +856,7 @@ class ModelAverage(Optimizer):
params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
- if param.name not in params and param.average:
+ if param.name not in params and param.do_model_average != False:
grad = param.block.create_var(
name=unique_name.generate(".".join([param.name, 'tmp'])),
dtype=param.dtype,
diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py
index 74b968f8ee..1c6970441b 100644
--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
@@ -29,14 +29,14 @@ class ParamAttr(object):
regularizer=None,
trainable=True,
gradient_clip=None,
- average=True):
+ do_model_average=None):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.gradient_clip = gradient_clip
- self.average = average
+ self.model_average = do_model_average
def set_default_initializer(self, initializer):
if initializer is None:
@@ -83,7 +83,7 @@ class ParamAttr(object):
'regularizer': self.regularizer,
'trainable': self.trainable,
'gradient_clip_attr': self.gradient_clip,
- 'average': self.average
+ 'model_average': self.model_average
}
if with_initializer:
kwargs['initializer'] = self.initializer
From 442c150333ce169b9e1221c0f2e61af8cfdc1e2b Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Wed, 4 Apr 2018 20:35:59 +0800
Subject: [PATCH 11/62] a draft of ThreadedReader
---
.../reader/create_threaded_reader_op.cc | 125 ++++++++++++++++++
1 file changed, 125 insertions(+)
create mode 100644 paddle/fluid/operators/reader/create_threaded_reader_op.cc
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
new file mode 100644
index 0000000000..a4aebafa8b
--- /dev/null
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -0,0 +1,125 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/detail/safe_ref.h"
+#include "paddle/fluid/operators/reader/reader_op_registry.h"
+
+namespace paddle {
+namespace operators {
+namespace reader {
+
+class ThreadedReader : public framework::DecoratedReader {
+ public:
+ ThreadedReader(ReaderBase* reader, bool unsafe_mode)
+ : DecoratedReader(reader), unsafe_mode_(unsafe_mode) {}
+
+ void ReadNext(std::vector* out) override {
+ std::lock_guard lock(mutex_);
+ if (!unsafe_mode) {
+ if (!reader_->HasNext()) {
+ PADDLE_THROW("There is no next data!");
+ }
+ reader_->ReadNext(out);
+ } else {
+ auto& thread_buffer = thread_buffers_[std::this_thread::get_id()];
+ if (thread_buffer.empty()) {
+ PADDLE_THROW(
+ "thread_buffer is empty! HasNext() must be invoked before "
+ "ReadNext() in the same thread.");
+ }
+ *out = thread_buffer;
+ thread_buffer.clear();
+ }
+ }
+
+ bool HasNext() const override {
+ if (!unsafe_mode_) {
+ PADDLE_THROW(
+ "ThreadedReader::HasNext() is disabled when 'unsafe_mode' is false.");
+ }
+ std::thread::id thread_id = std::this_thread::get_id();
+ std::lock_guard lock(mutex_);
+ auto& thread_buffer = thread_buffers_[thread_id];
+ if (thread_buffer.empty() && reader_->HasNext()) {
+ reader_->ReadNext(&thread_buffer);
+ }
+ return !threda_buffer.empty();
+ }
+
+ void ReInit() override;
+
+ ~ThreadedReader() {
+ for (auto& p : thread_buffers_) {
+ if (!p.second.empty()) {
+ PADDLE_THROW(
+ "Find an unused data batch in ThreadedReader! Maybe one thread "
+ "invokes 'HasNext()' without subsequent 'ReadNext()'.");
+ }
+ }
+ }
+
+ private:
+ mutable std::mutex mutex_;
+ mutable std::unordered_map>
+ thread_buffers_;
+};
+
+class CreateThreadedReaderOp : public framework::OperatorBase {
+ public:
+ using framework::OperatorBase::OperatorBase;
+
+ private:
+ void RunImpl(const framework::Scope& scope,
+ const platform::Place& dev_place) const override {
+ auto* out = detail::Ref(scope.FindVar(Output("Out")))
+ .GetMutable();
+ if (out->Get() != nullptr) {
+ return;
+ }
+ const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader"))
+ ->Get();
+ bool unsafe_mode = Attr("unsafe_mode");
+ out->Reset(new ThreadedReader(underlying_reader.Get(), unsafe_mode));
+ }
+};
+
+class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
+ public:
+ CreateThreadedReaderOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
+ : DecoratedReaderMakerBase(op_proto, op_checker) {
+ AddAttr("unsafe_mode",
+ "When 'unsafe_mode' is false, invoking 'HasNext()' or "
+ "'ReInit()' is not allowed to avoid unexpected bugs in "
+ "multi-thread environment.")
+ .SetDefault(false);
+ AddComment(R"DOC(
+ CreateThreadedReader Operator
+
+ This operator creates a threaded reader. A threaded reader's
+ 'ReadNext()' can be invoked by several threads at the same
+ time.
+ When the attribute 'unsafe_mode' is false, the threaded reader's
+ 'HasNext()' and 'ReInit()' will be disabled to avoid unexpected
+ bugs in multi-thread environment. If you really need them, you
+ can enable them by setting 'unsafe_mode' true. In this case,
+ 'HasNext()' returning true only guarantees the safety of
+ invoking 'ReadNext()' in the same thread. Each thread must
+ invoke 'HasNext()' and 'ReadNext()' in pair.
+ )DOC")
+ }
+};
+
+} // namespace reader
+} // namespace operators
+} // namespace paddle
From 01c6618de904e1d49660486cd65f8810cc9665a3 Mon Sep 17 00:00:00 2001
From: typhoonzero
Date: Sun, 8 Apr 2018 09:38:26 +0800
Subject: [PATCH 12/62] first wip commit
---
.../fluid/framework/details/send_op_handle.cc | 78 +++++++++++++++++++
.../fluid/framework/details/send_op_handle.h | 50 ++++++++++++
paddle/fluid/operators/detail/grpc_client.cc | 3 +-
3 files changed, 129 insertions(+), 2 deletions(-)
create mode 100644 paddle/fluid/framework/details/send_op_handle.cc
create mode 100644 paddle/fluid/framework/details/send_op_handle.h
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
new file mode 100644
index 0000000000..bd2a0a9c29
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -0,0 +1,78 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/details/send_op_handle.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+SendOpHandle::SendOpHandle(const std::vector &local_scopes,
+ const std::vector &places,
+ const platform::NCCLContextMap &ctxs)
+ : local_scopes_(local_scopes), places_(places) {}
+
+void SendOpHandle::RunImpl() {
+ if (inputs_.size() == 1) {
+ return; // No need to all reduce when GPU count = 1;
+ } else {
+ // Wait input done
+ for (auto *in : inputs_) {
+ auto &p = static_cast(in)->place_;
+ in->generated_op_->Wait(dev_ctxes_[p]);
+ }
+
+ auto &var_name = static_cast(this->inputs_[0])->name_;
+ int dtype = -1;
+ size_t numel = 0;
+
+ std::vector> all_reduce_calls;
+
+ for (size_t i = 0; i < local_scopes_.size(); ++i) {
+ auto &p = places_[i];
+ auto *s = local_scopes_[i];
+ int dev_id = boost::get(p).device;
+
+ auto &lod_tensor = s->FindVar(var_name)->Get();
+ void *buffer = const_cast(lod_tensor.data());
+
+ if (dtype == -1) {
+ dtype = platform::ToNCCLDataType(lod_tensor.type());
+ }
+
+ if (numel == 0) {
+ numel = static_cast(lod_tensor.numel());
+ }
+
+ auto &nccl_ctx = nccl_ctxs_.at(dev_id);
+ auto stream = nccl_ctx.stream();
+ auto comm = nccl_ctx.comm_;
+ all_reduce_calls.emplace_back([=] {
+ PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
+ buffer, buffer, numel, static_cast(dtype), ncclSum,
+ comm, stream));
+ });
+ }
+
+ platform::NCCLGroupGuard guard;
+ for (auto &call : all_reduce_calls) {
+ call();
+ }
+ }
+}
+
+std::string NCCLAllReduceOpHandle::Name() const { return "nccl_all_reduce"; }
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/details/send_op_handle.h b/paddle/fluid/framework/details/send_op_handle.h
new file mode 100644
index 0000000000..515f1a10a8
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+
+#include "paddle/fluid/framework/details/op_handle_base.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/platform/nccl_helper.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+struct SendOpHandle : public OpHandleBase {
+ const std::vector &local_scopes_;
+ const std::vector &places_;
+ const platform::NCCLContextMap &nccl_ctxs_;
+
+ SendOpHandle(const std::vector &local_scopes,
+ const std::vector &places,
+ const platform::NCCLContextMap &ctxs);
+
+ std::string Name() const override;
+
+ // Delay and buffer nccl_all_reduce together can significantly increase
+ // performance. Disable this feature by returning false.
+ bool IsMultiDeviceTransfer() override { return true; };
+
+ protected:
+ void RunImpl() override;
+};
+
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc
index ef987d07f0..3cf286575e 100644
--- a/paddle/fluid/operators/detail/grpc_client.cc
+++ b/paddle/fluid/operators/detail/grpc_client.cc
@@ -65,9 +65,8 @@ bool RPCClient::AsyncSendVariable(const std::string& ep,
}
void ProcGetResponse(const VarHandle& var_h,
- // const sendrecv::VariableMessage& ret_msg) {
const ::grpc::ByteBuffer& ret_msg) {
- framework::Variable* outvar = NULL;
+ framework::Variable* outvar = nullptr;
DeserializeFromByteBuffer(ret_msg, *var_h.ctx, var_h.scope, &outvar);
}
From 8fed780f14bf24954300ba37cebd2338ee7d199c Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Sun, 8 Apr 2018 11:26:06 +0800
Subject: [PATCH 13/62] Complete threaded reader
---
paddle/fluid/operators/reader/CMakeLists.txt | 1 +
.../operators/reader/create_threaded_reader_op.cc | 15 ++++++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt
index 6fa0195b9a..845528860f 100644
--- a/paddle/fluid/operators/reader/CMakeLists.txt
+++ b/paddle/fluid/operators/reader/CMakeLists.txt
@@ -22,5 +22,6 @@ reader_library(create_batch_reader_op SRCS create_batch_reader_op.cc)
reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_op.cc)
reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc)
reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc)
+reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc)
# Export local libraries to parent
set(READER_LIBRARY ${LOCAL_READER_LIBS} PARENT_SCOPE)
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
index a4aebafa8b..489866ca80 100644
--- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -57,7 +57,15 @@ class ThreadedReader : public framework::DecoratedReader {
return !threda_buffer.empty();
}
- void ReInit() override;
+ void ReInit() override {
+ if (!unsafe_mode_) {
+ PADDLE_THROW(
+ "ThreadedReader::ReInit() is disabled when 'unsafe_mode' is false.");
+ }
+ VLOG(5) << "ThreadedReader::ReInit() is invoked! It might be buggy in "
+ "multi-thread environment.";
+ reader_->ReInit();
+ }
~ThreadedReader() {
for (auto& p : thread_buffers_) {
@@ -123,3 +131,8 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
} // namespace reader
} // namespace operators
} // namespace paddle
+
+namespace reader = paddle::operators::reader;
+REGISTER_FILE_READER_OPERATOR(create_threaded_reader,
+ reader::CreateThreadedReaderOp,
+ reader::CreateThreadedReaderOpMaker);
From 03ff0e58fe433496330801627e0ae2f15e21df20 Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Sun, 8 Apr 2018 04:25:56 +0000
Subject: [PATCH 14/62] fix compile errors
---
paddle/fluid/operators/reader/create_threaded_reader_op.cc | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
index 489866ca80..565cbe4d9f 100644
--- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -26,7 +26,7 @@ class ThreadedReader : public framework::DecoratedReader {
void ReadNext(std::vector* out) override {
std::lock_guard lock(mutex_);
- if (!unsafe_mode) {
+ if (!unsafe_mode_) {
if (!reader_->HasNext()) {
PADDLE_THROW("There is no next data!");
}
@@ -54,7 +54,7 @@ class ThreadedReader : public framework::DecoratedReader {
if (thread_buffer.empty() && reader_->HasNext()) {
reader_->ReadNext(&thread_buffer);
}
- return !threda_buffer.empty();
+ return !thread_buffer.empty();
}
void ReInit() override {
@@ -78,6 +78,7 @@ class ThreadedReader : public framework::DecoratedReader {
}
private:
+ bool unsafe_mode_;
mutable std::mutex mutex_;
mutable std::unordered_map>
thread_buffers_;
@@ -124,7 +125,7 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
'HasNext()' returning true only guarantees the safety of
invoking 'ReadNext()' in the same thread. Each thread must
invoke 'HasNext()' and 'ReadNext()' in pair.
- )DOC")
+ )DOC");
}
};
From 49ab52d64d8aced5da6d4eedd34773baebae5546 Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Sun, 8 Apr 2018 13:01:26 +0800
Subject: [PATCH 15/62] Modify MultipleReader
1. Removes MultipleReader's multi-thread support, for we have got
ThreadedReader.
2. Rename MultipleReader to MultiFileReader
---
.../reader/create_threaded_reader_op.cc | 2 +-
.../fluid/operators/reader/open_files_op.cc | 66 +++++++------------
2 files changed, 24 insertions(+), 44 deletions(-)
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
index 565cbe4d9f..854381e0ee 100644
--- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -124,7 +124,7 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
can enable them by setting 'unsafe_mode' true. In this case,
'HasNext()' returning true only guarantees the safety of
invoking 'ReadNext()' in the same thread. Each thread must
- invoke 'HasNext()' and 'ReadNext()' in pair.
+ invoke 'HasNext()' and 'ReadNext()' in pairs.
)DOC");
}
};
diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc
index db4e619e7b..45db94e780 100644
--- a/paddle/fluid/operators/reader/open_files_op.cc
+++ b/paddle/fluid/operators/reader/open_files_op.cc
@@ -19,27 +19,11 @@ namespace paddle {
namespace operators {
namespace reader {
-class MultipleReader : public framework::ReaderBase {
+class MultiFileReader : public framework::ReaderBase {
public:
- class ThreadBufferMap {
- public:
- std::vector& operator[](
- const std::thread::id& thread_id) {
- std::lock_guard lock(mutex_);
- return buffer_[thread_id];
- }
-
- void Clear() { buffer_.clear(); }
-
- private:
- std::mutex mutex_;
- std::unordered_map>
- buffer_;
- };
-
- MultipleReader(const std::vector& file_names,
- const std::vector& dims, size_t thread_num,
- size_t buffer_size)
+ MultiFileReader(const std::vector& file_names,
+ const std::vector& dims, size_t thread_num,
+ size_t buffer_size)
: file_names_(file_names), dims_(dims), buffer_size_(buffer_size) {
prefetchers_.resize(thread_num);
StartNewScheduler();
@@ -49,7 +33,7 @@ class MultipleReader : public framework::ReaderBase {
bool HasNext() const override;
void ReInit() override;
- ~MultipleReader() { EndScheduler(); }
+ ~MultiFileReader() { EndScheduler(); }
private:
void StartNewScheduler();
@@ -65,31 +49,27 @@ class MultipleReader : public framework::ReaderBase {
framework::Channel* waiting_file_idx_;
framework::Channel* available_thread_idx_;
framework::Channel>* buffer_;
- mutable ThreadBufferMap thread_buffer_map_;
};
-void MultipleReader::ReadNext(std::vector* out) {
+void MultiFileReader::ReadNext(std::vector* out) {
if (!HasNext()) {
PADDLE_THROW("There is no next data!");
}
- auto& thread_local_buffer = thread_buffer_map_[std::this_thread::get_id()];
- *out = thread_local_buffer;
- thread_local_buffer.clear();
+ buffer_->Receive(out);
}
-bool MultipleReader::HasNext() const {
- auto& thread_local_buffer = thread_buffer_map_[std::this_thread::get_id()];
- return thread_local_buffer.empty() ? buffer_->Receive(&thread_local_buffer)
- : true;
+bool MultiFileReader::HasNext() const {
+ while (!buffer_->IsClosed() && !buffer_->CanReceive()) {
+ }
+ return buffer_->CanReceive();
}
-void MultipleReader::ReInit() {
+void MultiFileReader::ReInit() {
EndScheduler();
- thread_buffer_map_.Clear();
StartNewScheduler();
}
-void MultipleReader::StartNewScheduler() {
+void MultiFileReader::StartNewScheduler() {
size_t thread_num = prefetchers_.size();
waiting_file_idx_ = framework::MakeChannel(file_names_.size());
available_thread_idx_ = framework::MakeChannel(thread_num);
@@ -107,7 +87,7 @@ void MultipleReader::StartNewScheduler() {
scheduler_ = std::thread([this] { ScheduleThreadFunc(); });
}
-void MultipleReader::EndScheduler() {
+void MultiFileReader::EndScheduler() {
available_thread_idx_->Close();
buffer_->Close();
waiting_file_idx_->Close();
@@ -119,8 +99,8 @@ void MultipleReader::EndScheduler() {
delete waiting_file_idx_;
}
-void MultipleReader::ScheduleThreadFunc() {
- VLOG(5) << "MultipleReader schedule thread starts.";
+void MultiFileReader::ScheduleThreadFunc() {
+ VLOG(5) << "MultiFileReader schedule thread starts.";
size_t completed_thread_num = 0;
size_t thread_idx;
while (available_thread_idx_->Receive(&thread_idx)) {
@@ -152,11 +132,11 @@ void MultipleReader::ScheduleThreadFunc() {
p.join();
}
}
- VLOG(5) << "MultipleReader schedule thread terminates.";
+ VLOG(5) << "MultiFileReader schedule thread terminates.";
}
-void MultipleReader::PrefetchThreadFunc(std::string file_name,
- size_t thread_idx) {
+void MultiFileReader::PrefetchThreadFunc(std::string file_name,
+ size_t thread_idx) {
VLOG(5) << "The prefetch thread of file '" << file_name << "' starts.";
std::unique_ptr reader =
CreateReaderByFileName(file_name, dims_);
@@ -203,9 +183,9 @@ class OpenFilesOp : public framework::OperatorBase {
auto* out = scope.FindVar(Output("Out"))
->template GetMutable();
- out->Reset(new MultipleReader(file_names,
- RestoreShapes(shape_concat, ranks),
- thread_num, buffer_size));
+ out->Reset(new MultiFileReader(file_names,
+ RestoreShapes(shape_concat, ranks),
+ thread_num, buffer_size));
}
};
@@ -221,7 +201,7 @@ class OpenFilesOpMaker : public FileReaderMakerBase {
AddComment(R"DOC(
OpenFiles Operator
- An OpenFilesOp creates a MultipleReader, which is able to
+ An OpenFilesOp creates a MultiFileReader, which is able to
read data multi-threaded from multiple files.
)DOC");
}
From fca9e8847d5017601251ee8813e7af513b2603ed Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Sun, 8 Apr 2018 16:10:14 +0800
Subject: [PATCH 16/62] Update Readers Python API
1. Combine 'open_files', 'multi_pass_reader' and 'threaded_reader'
together to make the new 'open_files' interface.
2. Add some docstring.
3. Simplify interface names of 'create_XXX_reader', e.g, rename
'create_double_buffer_reader' to 'double_buffer'.
---
python/paddle/fluid/layers/io.py | 109 ++++++++++++++++++++++++-------
1 file changed, 85 insertions(+), 24 deletions(-)
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 7413e69234..fc8809ce15 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -22,7 +22,7 @@ from ..executor import global_scope
__all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
'open_files', 'read_file', 'create_shuffle_reader',
- 'create_double_buffer_reader', 'create_multi_pass_reader'
+ 'create_double_buffer_reader'
]
@@ -283,7 +283,43 @@ def _copy_reader_create_op_(block, op):
return new_op
-def open_recordio_file(filename, shapes, lod_levels, dtypes):
+def open_recordio_file(filename,
+ shapes,
+ lod_levels,
+ dtypes,
+ pass_num=1,
+ for_parallel=False):
+ """
+ Open a RecordIO file
+
+ This layer takes a RecordIO file to read from and returns a Reader Variable.
+ Via the Reader Variable, we can get data from the given RecordIO file.
+
+ Args:
+ filename(str): The RecordIO file's name.
+ shapes(list): List of tuples which declaring data shapes.
+ lod_levels(list): List of ints which declaring data lod_level.
+ dtypes(list): List of strs which declaring data type.
+ pass_num(int): Number of passes to run. After completing the
+ given number of passes, 'has_next()' will return False.
+ for_parallel(Bool): Set it as True if you are going to run
+ subsequent operators in parallel.
+
+ Returns:
+ Variable: A Reader Variable via which we can get RecordIO file data.
+
+ Examples:
+ .. code-block:: python
+
+ reader = fluid.layers.io.open_recordio_file(
+ filename='./data.recordio',
+ shapes=[(3,224,224), (1)],
+ lod_levels=[0, 0],
+ dtypes=['float32', 'int64'])
+
+ # Via the reader, we can use 'read_file' layer to get data:
+ image, label = fluid.layers.read_file(reader)
+ """
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
shape_concat = []
ranks = []
@@ -310,6 +346,13 @@ def open_recordio_file(filename, shapes, lod_levels, dtypes):
startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var)
+
+ if pass_num > 1:
+ main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
+
+ if for_parallel:
+ main_prog_var = for_parallel(reader=main_prog_var)
+
return monkey_patch_reader_methods(main_prog_var)
@@ -318,11 +361,15 @@ def open_files(filenames,
lod_levels,
dtypes,
thread_num,
- buffer_size=None):
+ buffer_size=None,
+ pass_num=1,
+ for_parallel=False):
"""
Open files
- This layer takes a list of files to read from and returns a Reader Variable. Via the Reader Variable, we can get data from given files.
+ This layer takes a list of files to read from and returns a Reader Variable.
+ Via the Reader Variable, we can get data from given files. All files must
+ have name suffixs to indicate their formats, e.g., '*.recordio'.
Args:
filenames(list): The list of file names.
@@ -331,6 +378,10 @@ def open_files(filenames,
dtypes(list): List of strs which declaring data type.
thread_num(int): The maximal concurrent prefetch thread number.
buffer_size(int): The size of prefetch buffer.
+ pass_num(int): Number of passes to run. After completing the
+ given number of passes, 'has_next()' will return False.
+ for_parallel(Bool): Set it as True if you are going to run
+ subsequent operators in parallel.
Returns:
Variable: A Reader Variable via which we can get file data.
@@ -338,16 +389,16 @@ def open_files(filenames,
Examples:
.. code-block:: python
- reader = fluid.layers.open_files(filenames=['./data1.recordio',
+ reader = fluid.layers.io.open_files(filenames=['./data1.recordio',
'./data2.recordio'],
- shapes=[(3,224,224), (1)],
- lod_levels=[0, 0],
- dtypes=['float32', 'int64'],
- thread_num=2,
- buffer_size=2)
+ shapes=[(3,224,224), (1)],
+ lod_levels=[0, 0],
+ dtypes=['float32', 'int64'],
+ thread_num=2,
+ buffer_size=2)
# Via the reader, we can use 'read_file' layer to get data:
- image, label = fluid.layers.read_file(reader)
+ image, label = fluid.layers.io.read_file(reader)
"""
if buffer_size is None:
buffer_size = thread_num
@@ -361,13 +412,12 @@ def open_files(filenames,
shape_concat.extend(shape)
ranks.append(len(shape))
- var_name = unique_name('multiple_reader')
-
+ multi_file_reader_name = unique_name('multi_file_reader')
startup_blk = default_startup_program().current_block()
- startup_var = startup_blk.create_var(name=var_name)
+ startup_reader = startup_blk.create_var(name=multi_file_reader_name)
startup_blk.append_op(
type='open_files',
- outputs={'Out': [startup_var]},
+ outputs={'Out': [startup_reader]},
attrs={
'shape_concat': shape_concat,
'lod_levels': lod_levels,
@@ -377,14 +427,21 @@ def open_files(filenames,
'buffer_size': buffer_size
})
- startup_var.desc.set_dtypes(dtypes)
- startup_var.persistable = True
- main_prog_var = _copy_reader_var_(default_main_program().current_block(),
- startup_var)
- return monkey_patch_reader_methods(main_prog_var)
+ startup_reader.desc.set_dtypes(dtypes)
+ startup_reader.persistable = True
+ main_prog_reader = _copy_reader_var_(default_main_program().current_block(),
+ startup_reader)
+ if pass_num > 1:
+ main_prog_reader = multi_pass(
+ reader=main_prog_reader, pass_num=pass_num)
+ if for_parallel:
+ main_prog_reader = for_parallel(reader=main_prog_reader)
-def __create_decorated_reader__(op_type, reader, attrs):
+ return monkey_patch_reader_methods(main_prog_reader)
+
+
+def __create_decorated_reader__(op_type, reader, attrs={}):
var_name = unique_name(op_type)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name)
@@ -400,12 +457,12 @@ def __create_decorated_reader__(op_type, reader, attrs):
return monkey_patch_reader_methods(main_prog_var)
-def create_shuffle_reader(reader, buffer_size):
+def shuffle(reader, buffer_size):
return __create_decorated_reader__('create_shuffle_reader', reader,
{'buffer_size': int(buffer_size)})
-def create_double_buffer_reader(reader, place=None):
+def double_buffer(reader, place=None):
attrs = dict()
if place is not None:
attrs['place'] = str(place).upper()
@@ -413,11 +470,15 @@ def create_double_buffer_reader(reader, place=None):
attrs)
-def create_multi_pass_reader(reader, pass_num):
+def multi_pass(reader, pass_num):
return __create_decorated_reader__('create_multi_pass_reader', reader,
{'pass_num': int(pass_num)})
+def for_parallel(reader):
+ return __create_decorated_reader__('create_threaded_reader', reader)
+
+
def read_file(file_obj):
helper = LayerHelper('read_file')
out = [
From 5ad2486905214e658a0ef8f54e9b447c1fec03b2 Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Sun, 8 Apr 2018 09:15:58 +0000
Subject: [PATCH 17/62] fix errors
---
python/paddle/fluid/layers/io.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index fc8809ce15..dbba1a46eb 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -21,8 +21,7 @@ from ..executor import global_scope
__all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
- 'open_files', 'read_file', 'create_shuffle_reader',
- 'create_double_buffer_reader'
+ 'open_files', 'read_file', 'shuffle', 'double_buffer'
]
From baea2cf17892f2cba47c8bde29bccd7488c2ee52 Mon Sep 17 00:00:00 2001
From: typhoonzero
Date: Sun, 8 Apr 2018 18:35:49 +0800
Subject: [PATCH 18/62] wip
---
paddle/fluid/framework/details/CMakeLists.txt | 1 +
.../details/multi_devices_graph_builder.cc | 59 +++++++++++++----
.../details/multi_devices_graph_builder.h | 14 ++++-
.../fluid/framework/details/send_op_handle.cc | 63 ++++---------------
.../fluid/framework/details/send_op_handle.h | 15 ++---
python/paddle/fluid/framework.py | 7 +++
6 files changed, 87 insertions(+), 72 deletions(-)
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
index 89b5c6847f..caaf418076 100644
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@@ -5,6 +5,7 @@ cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod
nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda)
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
+cc_library(send_op_handle SRCS send_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base)
cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
index 128a5344fb..bea9489bbd 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
+#include "paddle/fluid/framework/details/send_op_handle.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
@@ -34,26 +35,46 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::string &loss_var_name,
const std::unordered_set ¶ms,
const std::vector &local_scopes,
- platform::NCCLContextMap *nccl_ctxs)
+ platform::NCCLContextMap *nccl_ctxs, bool distributed)
: loss_var_name_(loss_var_name),
places_(places),
local_scopes_(local_scopes),
+ distributed_(distributed),
nccl_ctxs_(nccl_ctxs) {
#else
MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::vector &places,
const std::string &loss_var_name,
const std::unordered_set ¶ms,
- const std::vector &local_scopes)
+ const std::vector &local_scopes, bool distributed)
: loss_var_name_(loss_var_name),
places_(places),
- local_scopes_(local_scopes) {
+ local_scopes_(local_scopes),
+ distributed_(distributed) {
#endif
for (auto &p : params) {
grad_names_.insert(GradVarName(p));
}
}
+void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
+ const platform::Place &p,
+ const size_t &i) const {
+ auto *op_handle = result->ops_.back().get();
+
+ auto var_names = op->InputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ VarHandle *var = CreateOrGetLatestVarHandle(result, each_var_name, p, i);
+ op_handle->AddInput(var);
+ }
+ var_names = op->OutputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ CreateOpOutput(result, op_handle, each_var_name, p, i);
+ }
+}
+
std::unique_ptr MultiDevSSAGraphBuilder::Build(
const ProgramDesc &program) const {
auto graph = new SSAGraph();
@@ -72,6 +93,17 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
}
}
+ // append send op if program is distributed trainer main program.
+ // always use the first device
+ if (is_forwarding && distributed_ && op->Type() == "send") {
+ auto &p = places_[0];
+ auto *s = local_scopes_[0];
+ size_t i = 0;
+ result.ops_.emplace_back(new SendOpHandle(*op, s, p));
+ CreateOpHandleIOs(&result, op, p, i);
+ continue;
+ }
+
for (size_t i = 0; i < places_.size(); ++i) {
auto &p = places_[i];
auto *s = local_scopes_[i];
@@ -81,18 +113,19 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
op_handle->dev_ctxes_[p] = const_cast(
platform::DeviceContextPool::Instance().Get(p));
- auto var_names = op->InputArgumentNames();
+ CreateOpHandleIOs(&result, op, p, i);
+ // auto var_names = op->InputArgumentNames();
- for (auto &each_var_name : var_names) {
- VarHandle *var =
- CreateOrGetLatestVarHandle(&result, each_var_name, p, i);
- op_handle->AddInput(var);
- }
- var_names = op->OutputArgumentNames();
+ // for (auto &each_var_name : var_names) {
+ // VarHandle *var =
+ // CreateOrGetLatestVarHandle(&result, each_var_name, p, i);
+ // op_handle->AddInput(var);
+ // }
+ auto var_names = op->OutputArgumentNames();
- for (auto &each_var_name : var_names) {
- CreateOpOutput(&result, op_handle, each_var_name, p, i);
- }
+ // for (auto &each_var_name : var_names) {
+ // CreateOpOutput(&result, op_handle, each_var_name, p, i);
+ // }
if (is_forwarding) {
if (var_names.size() == 1 && var_names[0] == loss_var_name_) {
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h
index d3c8e582cf..004d6d50ab 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -14,6 +14,9 @@
#pragma once
+#include
+#include
+
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace paddle {
@@ -31,21 +34,28 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const std::string &loss_var_name,
const std::unordered_set ¶ms,
const std::vector &local_scopes,
- platform::NCCLContextMap *nccl_ctxs);
+ platform::NCCLContextMap *nccl_ctxs,
+ bool distributed = false);
#else
MultiDevSSAGraphBuilder(const std::vector &places,
const std::string &loss_var_name,
const std::unordered_set ¶ms,
- const std::vector &local_scopes);
+ const std::vector &local_scopes,
+ bool distributed = false);
#endif
std::unique_ptr Build(const ProgramDesc &program) const override;
+ private:
+ void CreateOpHandleIOs(SSAGraph *result, OpDesc *op, const platform::Place &p,
+ const size_t &i) const;
+
private:
std::string loss_var_name_;
const std::vector &places_;
const std::vector &local_scopes_;
std::unordered_set grad_names_;
+ bool distributed_;
#ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap *nccl_ctxs_;
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
index bd2a0a9c29..ae5637b804 100644
--- a/paddle/fluid/framework/details/send_op_handle.cc
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -18,61 +18,24 @@ namespace paddle {
namespace framework {
namespace details {
-SendOpHandle::SendOpHandle(const std::vector &local_scopes,
- const std::vector &places,
- const platform::NCCLContextMap &ctxs)
- : local_scopes_(local_scopes), places_(places) {}
+SendOpHandle::SendOpHandle(const framework::OpDesc &op_desc,
+ const Scope *local_scope,
+ const platform::Place &place)
+ : op_(framework::OpRegistry::CreateOp(op_desc)),
+ local_scope_(local_scope),
+ place_(place) {}
void SendOpHandle::RunImpl() {
- if (inputs_.size() == 1) {
- return; // No need to all reduce when GPU count = 1;
- } else {
- // Wait input done
- for (auto *in : inputs_) {
- auto &p = static_cast(in)->place_;
- in->generated_op_->Wait(dev_ctxes_[p]);
- }
-
- auto &var_name = static_cast(this->inputs_[0])->name_;
- int dtype = -1;
- size_t numel = 0;
-
- std::vector> all_reduce_calls;
-
- for (size_t i = 0; i < local_scopes_.size(); ++i) {
- auto &p = places_[i];
- auto *s = local_scopes_[i];
- int dev_id = boost::get(p).device;
-
- auto &lod_tensor = s->FindVar(var_name)->Get();
- void *buffer = const_cast(lod_tensor.data());
-
- if (dtype == -1) {
- dtype = platform::ToNCCLDataType(lod_tensor.type());
- }
-
- if (numel == 0) {
- numel = static_cast(lod_tensor.numel());
- }
-
- auto &nccl_ctx = nccl_ctxs_.at(dev_id);
- auto stream = nccl_ctx.stream();
- auto comm = nccl_ctx.comm_;
- all_reduce_calls.emplace_back([=] {
- PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
- buffer, buffer, numel, static_cast(dtype), ncclSum,
- comm, stream));
- });
- }
-
- platform::NCCLGroupGuard guard;
- for (auto &call : all_reduce_calls) {
- call();
- }
+ // Wait input done
+ for (auto *in : inputs_) {
+ auto &p = static_cast(in)->place_;
+ in->generated_op_->Wait(dev_ctxes_[p]);
}
+
+ op_->Run(*local_scope_, place_);
}
-std::string NCCLAllReduceOpHandle::Name() const { return "nccl_all_reduce"; }
+std::string SendOpHandle::Name() const { return "send"; }
} // namespace details
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/details/send_op_handle.h b/paddle/fluid/framework/details/send_op_handle.h
index 515f1a10a8..e7857c1f23 100644
--- a/paddle/fluid/framework/details/send_op_handle.h
+++ b/paddle/fluid/framework/details/send_op_handle.h
@@ -19,6 +19,8 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/platform/nccl_helper.h"
@@ -27,19 +29,18 @@ namespace framework {
namespace details {
struct SendOpHandle : public OpHandleBase {
- const std::vector &local_scopes_;
- const std::vector &places_;
- const platform::NCCLContextMap &nccl_ctxs_;
+ std::unique_ptr op_;
+ const Scope* local_scope_;
+ const platform::Place& place_;
- SendOpHandle(const std::vector &local_scopes,
- const std::vector &places,
- const platform::NCCLContextMap &ctxs);
+ SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope,
+ const platform::Place& place);
std::string Name() const override;
// Delay and buffer nccl_all_reduce together can significantly increase
// performance. Disable this feature by returning false.
- bool IsMultiDeviceTransfer() override { return true; };
+ bool IsMultiDeviceTransfer() override { return false; };
protected:
void RunImpl() override;
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 39d4017861..8bd9161fcb 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -951,6 +951,13 @@ class Block(object):
if var.type == core.VarDesc.VarType.STEP_SCOPES:
ret_var = self.create_var(
name=var.name, persistable=var.persistable, type=var.type)
+ elif var.type == core.VarDesc.VarType.SELECTED_ROWS:
+ ret_var = self.create_var(
+ name=var.name,
+ shape=var.shape,
+ dtype=var.dtype,
+ type=var.type,
+ persistable=True)
else:
ret_var = self.create_var(
name=var.name,
From 3f90a583b4e5f8a3534b03fb9ed83280ac2d69e4 Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Mon, 9 Apr 2018 04:35:25 +0000
Subject: [PATCH 19/62] update unittest
---
python/paddle/fluid/tests/unittests/test_multi_pass_reader.py | 2 +-
python/paddle/fluid/tests/unittests/test_recordio_reader.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py
index 0b7a290759..c8a8afbea6 100644
--- a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py
@@ -44,7 +44,7 @@ class TestMultipleReader(unittest.TestCase):
shapes=[(-1, 784), (-1, 1)],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
- data_file = fluid.layers.create_multi_pass_reader(
+ data_file = fluid.layers.io.multi_pass(
reader=data_file, pass_num=self.pass_num)
img, label = fluid.layers.read_file(data_file)
diff --git a/python/paddle/fluid/tests/unittests/test_recordio_reader.py b/python/paddle/fluid/tests/unittests/test_recordio_reader.py
index 24a0074d9b..096d99a3f3 100644
--- a/python/paddle/fluid/tests/unittests/test_recordio_reader.py
+++ b/python/paddle/fluid/tests/unittests/test_recordio_reader.py
@@ -74,8 +74,8 @@ class TestRecordIO(unittest.TestCase):
self.assertLess(avg_loss_np[-1], avg_loss_np[0])
def test_shuffle_reader(self):
- self.test_main(decorator_callback=lambda reader: fluid.layers.create_shuffle_reader(reader, buffer_size=200))
+ self.test_main(decorator_callback=lambda reader: fluid.layers.io.shuffle(reader, buffer_size=200))
def test_double_buffer_reader(self):
- self.test_main(decorator_callback=lambda reader: fluid.layers.create_double_buffer_reader(reader,
+ self.test_main(decorator_callback=lambda reader: fluid.layers.io.double_buffer(reader,
place='cuda:0' if fluid.core.is_compiled_with_cuda() else 'cpu'))
From 5416bac5d84b1d846744481505749df0a87db133 Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Mon, 9 Apr 2018 15:47:46 +0800
Subject: [PATCH 20/62] Make shared decorated readers' creater be only in
main_program
---
.../reader/create_double_buffer_reader_op.cc | 9 ++++--
python/paddle/fluid/layers/io.py | 30 ++++++++++++++-----
2 files changed, 28 insertions(+), 11 deletions(-)
diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
index ed868786ab..d9f799f14d 100644
--- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
@@ -109,7 +109,9 @@ class CreateDoubleBufferReaderOp : public framework::OperatorBase {
auto place_str = Attr("place");
platform::Place place;
- if (place_str == "CPU") {
+ if (place_str == "AUTO") {
+ place = dev_place;
+ } else if (place_str == "CPU") {
place = platform::CPUPlace();
} else {
std::istringstream sin(place_str);
@@ -140,8 +142,9 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase {
enum_range.insert(string::Sprintf("CUDA:%d", i));
}
enum_range.insert("CPU");
- AddAttr("place", "The double buffer place, default is CPU")
- .SetDefault("CPU")
+ enum_range.insert("AUTO");
+ AddAttr("place", "The double buffer place")
+ .SetDefault("AUTO")
.InEnum({enum_range});
}
};
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index dbba1a46eb..4901521db5 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -440,7 +440,7 @@ def open_files(filenames,
return monkey_patch_reader_methods(main_prog_reader)
-def __create_decorated_reader__(op_type, reader, attrs={}):
+def __create_unshared_decorated_reader__(op_type, reader, attrs={}):
var_name = unique_name(op_type)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name)
@@ -456,26 +456,40 @@ def __create_decorated_reader__(op_type, reader, attrs={}):
return monkey_patch_reader_methods(main_prog_var)
+def __create_shared_decorated_reader__(op_type, reader, attrs={}):
+ new_reader_name = unique_name(op_type)
+ main_blk = default_main_program().current_block()
+ new_reader = main_blk.create_var(name=new_reader_name)
+ main_blk.append_op(
+ type=op_type,
+ inputs={'UnderlyingReader': reader},
+ outputs={'Out': [new_reader]},
+ attrs=attrs)
+ new_reader.persistable = True
+ new_reader.stop_gradient = True
+ return monkey_patch_reader_methods(new_reader)
+
+
def shuffle(reader, buffer_size):
- return __create_decorated_reader__('create_shuffle_reader', reader,
- {'buffer_size': int(buffer_size)})
+ return __create_unshared_decorated_reader__(
+ 'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
def double_buffer(reader, place=None):
attrs = dict()
if place is not None:
attrs['place'] = str(place).upper()
- return __create_decorated_reader__('create_double_buffer_reader', reader,
- attrs)
+ return __create_unshared_decorated_reader__('create_double_buffer_reader',
+ reader, attrs)
def multi_pass(reader, pass_num):
- return __create_decorated_reader__('create_multi_pass_reader', reader,
- {'pass_num': int(pass_num)})
+ return __create_shared_decorated_reader__(
+ 'create_multi_pass_reader', reader, {'pass_num': int(pass_num)})
def for_parallel(reader):
- return __create_decorated_reader__('create_threaded_reader', reader)
+ return __create_shared_decorated_reader__('create_threaded_reader', reader)
def read_file(file_obj):
From 0586b9e5c95a7368fc944da96fc2bfac6796b07e Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Mon, 9 Apr 2018 15:49:15 +0800
Subject: [PATCH 21/62] Add *Initializer to fluid api docs
---
python/paddle/fluid/initializer.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index 927f1e625a..11015b6127 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -17,8 +17,7 @@ import numpy as np
import contextlib
__all__ = [
- 'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
- 'init_on_cpu'
+ 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer', 'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu','init_on_cpu'
]
_force_init_on_cpu_ = False
From 9bec0d26dbbe31c057426c1f88af64dd8c907e1e Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Mon, 9 Apr 2018 16:50:10 +0800
Subject: [PATCH 22/62] Adjust some contents
---
doc/v2/dev/write_docs_cn.rst | 8 ++++----
doc/v2/dev/write_docs_en.rst | 6 +++---
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 887d92942e..4231f2bb5c 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -81,13 +81,13 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
注:上述命令把当前目录(源码根目录)映射为 container 里的 :code:`/paddle` 目录。
-编译完成后,进入 ``paddle/build/doc/v2`` 目录,该目录下生成了 ``cn/html/`` 、 ``en/html`` 以及 ``api/en/html`` 共三个子目录,分别进入这些目录下,执行以下命令:
+编译完成后,会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 、 ``api/en/html`` 共三个子目录,分别进入这些目录下,执行以下命令:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-在浏览器中输入http://localhost:8088就可以看到编译生成的中/英文的文档页面和英文的API页面。
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。
如果不想使用Docker,也可以使用以下命令直接构建PaddlePaddle文档,即
@@ -107,13 +107,13 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
-编译完成后,会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,如果选择构建文档则会在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会在这两个目录下分别生成 ``api/en/html`` 目录,分别进入这些子目录下,执行以下命令:
+编译完成后,同样会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,如果选择构建文档则会在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会在这两个目录下分别生成 ``api/en/html`` 目录,分别进入这些子目录下,执行以下命令:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面,下图为生成的 ``v2`` 英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。下图为生成的 ``v2`` 英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
.. image:: src/doc_en.png
:align: center
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index 435bbdb60f..6105455e20 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -84,13 +84,13 @@ Build PaddlePaddle's documentation with Docker,you need to install Docker firs
Note: The above commands maps the current directory (source root directory) to the :code:`/paddle` directory in the container.
-After compiling, you could enter the ``paddle/build/doc/v2`` directory, where three subdirectories ``cn/html/``, ``en/html`` and ``api/en/html`` are generated. Please enter these directories respectively and execute the following commands:
+After compiling, there should be two generated directories: ``doc/v2`` and ``doc/fluid``, where three subdirectories ``cn/html/``, ``en/html`` and ``api/en/html`` are generated. Please enter these directories respectively and execute the following commands:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-Use a web browser and navigate to http://localhost:8000, you could see the compiled Chinese/English documents page and the English APIs page.
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page.
If you do not wish to use Docker, you can also use the following commands to directly build the PaddlePaddle documentation.
@@ -111,7 +111,7 @@ If you do not wish to use Docker, you can also use the following commands to dir
$processors indicates that as many processes as the CPU cores are started to compile in parallel. It should be set according to the number of CPU cores of your machine.
-After the compilation is complete, there should be two generated directories: ``doc/v2`` and ``doc/fluid`` . If you chose to build documents, two subdirectories ``cn/html/`` and ``en/html`` will be generated in both two directories. If you chose to build APIs,a subdirectory ``api/en/html`` will be generated. Please enter these directories respectively and execute the following commands:
+After compiling, there also should be two generated directories: ``doc/v2`` and ``doc/fluid`` . If you chose to build documents, two subdirectories ``cn/html/`` and ``en/html`` will be generated in both two directories. If you chose to build APIs,a subdirectory ``api/en/html`` will be generated. Please enter these directories respectively and execute the following commands:
.. code-block:: bash
From cd41dca2ab9f5d98d6817147df2e71af152150f5 Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Mon, 9 Apr 2018 18:22:57 +0800
Subject: [PATCH 23/62] Adjust
---
python/paddle/fluid/initializer.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index 11015b6127..ad15c32a35 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -17,7 +17,8 @@ import numpy as np
import contextlib
__all__ = [
- 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer', 'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu','init_on_cpu'
+ 'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
+ 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer'
]
_force_init_on_cpu_ = False
From 9fe938cb2aefcbced1e60fa459c943fa2ea245e6 Mon Sep 17 00:00:00 2001
From: jshower
Date: Tue, 10 Apr 2018 03:48:26 +0000
Subject: [PATCH 24/62] Changing network configuration, avoid nan
---
.../fluid/tests/book/test_label_semantic_roles.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index c0a6df831a..5fc64ea958 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -77,7 +77,7 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
emb_layers.append(mark_embedding)
hidden_0_layers = [
- fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
+ fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
@@ -94,8 +94,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
for i in range(1, depth):
mix_hidden = fluid.layers.sums(input=[
- fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
- fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
+ fluid.layers.fc(input=input_tmp[0], size=hidden_dim, act='tanh'),
+ fluid.layers.fc(input=input_tmp[1], size=hidden_dim, act='tanh')
])
lstm = fluid.layers.dynamic_lstm(
@@ -109,8 +109,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
input_tmp = [mix_hidden, lstm]
feature_out = fluid.layers.sums(input=[
- fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
- fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
+ fluid.layers.fc(input=input_tmp[0], size=label_dict_len, act='tanh'),
+ fluid.layers.fc(input=input_tmp[1], size=label_dict_len, act='tanh')
])
return feature_out
@@ -171,7 +171,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
# check other optimizers and check why out will be NAN
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
- learning_rate=0.0001,
+ learning_rate=0.01,
decay_steps=100000,
decay_rate=0.5,
staircase=True))
From ee178d5aebaa48f3434ec577ab1b450f4e3d7eab Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Tue, 10 Apr 2018 04:41:39 +0000
Subject: [PATCH 25/62] fix bugs
---
paddle/fluid/framework/parallel_executor.cc | 4 +---
paddle/fluid/operators/read_op.cc | 7 -------
.../operators/reader/create_threaded_reader_op.cc | 8 ++++----
python/paddle/fluid/layers/io.py | 13 +++++++------
4 files changed, 12 insertions(+), 20 deletions(-)
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index 74945fb4f2..1bb089c344 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -115,14 +115,12 @@ void ParallelExecutor::BCastParamsToGPUs(
for (auto &var : vars) {
auto *main_var = main_scope->FindVar(var);
- if (!main_var->IsType()) {
+ if (main_var == nullptr || !main_var->IsType()) {
continue;
}
auto &main_tensor = main_var->Get();
-
auto &dims = main_tensor.dims();
-
if (paddle::platform::is_gpu_place(main_tensor.place())) {
size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc
index 2925b8a85d..4496110cf8 100644
--- a/paddle/fluid/operators/read_op.cc
+++ b/paddle/fluid/operators/read_op.cc
@@ -66,13 +66,6 @@ class ReadOp : public framework::OperatorBase {
std::vector out_arg_names = Outputs("Out");
std::vector ins;
reader->ReadNext(&ins);
- if (ins.empty()) {
- reader->ReInit();
- reader->ReadNext(&ins);
- PADDLE_ENFORCE(
- !ins.empty(),
- "Reader can not read the next data even it has been re-initialized.");
- }
PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size());
for (size_t i = 0; i < ins.size(); ++i) {
auto* out =
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
index 854381e0ee..7b10135afc 100644
--- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -111,7 +111,7 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
"When 'unsafe_mode' is false, invoking 'HasNext()' or "
"'ReInit()' is not allowed to avoid unexpected bugs in "
"multi-thread environment.")
- .SetDefault(false);
+ .SetDefault(true);
AddComment(R"DOC(
CreateThreadedReader Operator
@@ -134,6 +134,6 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
} // namespace paddle
namespace reader = paddle::operators::reader;
-REGISTER_FILE_READER_OPERATOR(create_threaded_reader,
- reader::CreateThreadedReaderOp,
- reader::CreateThreadedReaderOpMaker);
+REGISTER_DECORATED_READER_OPERATOR(create_threaded_reader,
+ reader::CreateThreadedReaderOp,
+ reader::CreateThreadedReaderOpMaker);
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 4901521db5..d016ab9008 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -350,7 +350,7 @@ def open_recordio_file(filename,
main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
if for_parallel:
- main_prog_var = for_parallel(reader=main_prog_var)
+ main_prog_var = parallelize(reader=main_prog_var)
return monkey_patch_reader_methods(main_prog_var)
@@ -435,12 +435,12 @@ def open_files(filenames,
reader=main_prog_reader, pass_num=pass_num)
if for_parallel:
- main_prog_reader = for_parallel(reader=main_prog_reader)
+ main_prog_reader = parallelize(reader=main_prog_reader)
return monkey_patch_reader_methods(main_prog_reader)
-def __create_unshared_decorated_reader__(op_type, reader, attrs={}):
+def __create_shared_decorated_reader__(op_type, reader, attrs):
var_name = unique_name(op_type)
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name)
@@ -456,7 +456,7 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs={}):
return monkey_patch_reader_methods(main_prog_var)
-def __create_shared_decorated_reader__(op_type, reader, attrs={}):
+def __create_unshared_decorated_reader__(op_type, reader, attrs):
new_reader_name = unique_name(op_type)
main_blk = default_main_program().current_block()
new_reader = main_blk.create_var(name=new_reader_name)
@@ -488,8 +488,9 @@ def multi_pass(reader, pass_num):
'create_multi_pass_reader', reader, {'pass_num': int(pass_num)})
-def for_parallel(reader):
- return __create_shared_decorated_reader__('create_threaded_reader', reader)
+def parallelize(reader):
+ return __create_shared_decorated_reader__('create_threaded_reader', reader,
+ {})
def read_file(file_obj):
From d9a52223852a92d532ff2522cb648758511abe26 Mon Sep 17 00:00:00 2001
From: jshower
Date: Tue, 10 Apr 2018 04:57:30 +0000
Subject: [PATCH 26/62] code style
---
.../tests/book/test_label_semantic_roles.py | 67 ++++++++++---------
1 file changed, 34 insertions(+), 33 deletions(-)
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index 5fc64ea958..4f5d30ac00 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -70,14 +70,15 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
fluid.layers.embedding(
size=[word_dict_len, word_dim],
input=x,
- param_attr=fluid.ParamAttr(
- name=embedding_name, trainable=False)) for x in word_input
+ param_attr=fluid.ParamAttr(name=embedding_name, trainable=False))
+ for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0_layers = [
- fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') for emb in emb_layers
+ fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
+ for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
@@ -163,8 +164,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
- param_attr=fluid.ParamAttr(
- name='crfw', learning_rate=mix_hidden_lr))
+ param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(crf_cost)
# TODO(qiao)
@@ -189,8 +189,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_data = paddle.batch(
- paddle.reader.shuffle(
- paddle.dataset.conll05.test(), buf_size=8192),
+ paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
@@ -223,24 +222,25 @@ def train(use_cuda, save_dirname=None, is_local=True):
exe)
if batch_id % 10 == 0:
- print("avg_cost:" + str(cost) + " precision:" + str(
- precision) + " recall:" + str(recall) + " f1_score:" +
- str(f1_score) + " pass_precision:" + str(
- pass_precision) + " pass_recall:" + str(
- pass_recall) + " pass_f1_score:" + str(
- pass_f1_score))
+ print(
+ "avg_cost:" + str(cost) + " precision:" +
+ str(precision) + " recall:" + str(recall) +
+ " f1_score:" + str(f1_score) + " pass_precision:" + str(
+ pass_precision) + " pass_recall:" + str(pass_recall)
+ + " pass_f1_score:" + str(pass_f1_score))
if batch_id != 0:
- print("second per batch: " + str((time.time(
- ) - start_time) / batch_id))
+ print("second per batch: " + str(
+ (time.time() - start_time) / batch_id))
# Set the threshold low to speed up the CI test
if float(pass_precision) > 0.05:
if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode
- fluid.io.save_inference_model(save_dirname, [
- 'word_data', 'verb_data', 'ctx_n2_data',
- 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
- 'ctx_p2_data', 'mark_data'
- ], [feature_out], exe)
+ fluid.io.save_inference_model(
+ save_dirname, [
+ 'word_data', 'verb_data', 'ctx_n2_data',
+ 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
+ 'ctx_p2_data', 'mark_data'
+ ], [feature_out], exe)
return
batch_id = batch_id + 1
@@ -320,19 +320,20 @@ def infer(use_cuda, save_dirname=None):
assert feed_target_names[6] == 'ctx_p2_data'
assert feed_target_names[7] == 'mark_data'
- results = exe.run(inference_program,
- feed={
- feed_target_names[0]: word,
- feed_target_names[1]: pred,
- feed_target_names[2]: ctx_n2,
- feed_target_names[3]: ctx_n1,
- feed_target_names[4]: ctx_0,
- feed_target_names[5]: ctx_p1,
- feed_target_names[6]: ctx_p2,
- feed_target_names[7]: mark
- },
- fetch_list=fetch_targets,
- return_numpy=False)
+ results = exe.run(
+ inference_program,
+ feed={
+ feed_target_names[0]: word,
+ feed_target_names[1]: pred,
+ feed_target_names[2]: ctx_n2,
+ feed_target_names[3]: ctx_n1,
+ feed_target_names[4]: ctx_0,
+ feed_target_names[5]: ctx_p1,
+ feed_target_names[6]: ctx_p2,
+ feed_target_names[7]: mark
+ },
+ fetch_list=fetch_targets,
+ return_numpy=False)
print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
From 93940642ac1f52d28743a9dbdc2940a4d986f612 Mon Sep 17 00:00:00 2001
From: weixing02 <564445201@qq.com>
Date: Tue, 10 Apr 2018 14:28:50 +0800
Subject: [PATCH 27/62] Adjust
---
python/paddle/fluid/initializer.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index ad15c32a35..4e132ed261 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -18,7 +18,8 @@ import contextlib
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
- 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer'
+ 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
+ 'NormalInitializer', 'XavierInitializer'
]
_force_init_on_cpu_ = False
From 7c1434dd73d367932e98ae569093183d33b7e5fb Mon Sep 17 00:00:00 2001
From: jshower
Date: Tue, 10 Apr 2018 07:36:15 +0000
Subject: [PATCH 28/62] code style
---
.../tests/book/test_label_semantic_roles.py | 64 +++++++++----------
1 file changed, 32 insertions(+), 32 deletions(-)
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index 4f5d30ac00..ace2e39ba4 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -70,8 +70,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
fluid.layers.embedding(
size=[word_dict_len, word_dim],
input=x,
- param_attr=fluid.ParamAttr(name=embedding_name, trainable=False))
- for x in word_input
+ param_attr=fluid.ParamAttr(
+ name=embedding_name, trainable=False)) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
@@ -164,7 +164,8 @@ def train(use_cuda, save_dirname=None, is_local=True):
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
- param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr))
+ param_attr=fluid.ParamAttr(
+ name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(crf_cost)
# TODO(qiao)
@@ -189,7 +190,8 @@ def train(use_cuda, save_dirname=None, is_local=True):
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_data = paddle.batch(
- paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192),
+ paddle.reader.shuffle(
+ paddle.dataset.conll05.test(), buf_size=8192),
batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
@@ -222,25 +224,24 @@ def train(use_cuda, save_dirname=None, is_local=True):
exe)
if batch_id % 10 == 0:
- print(
- "avg_cost:" + str(cost) + " precision:" +
- str(precision) + " recall:" + str(recall) +
- " f1_score:" + str(f1_score) + " pass_precision:" + str(
- pass_precision) + " pass_recall:" + str(pass_recall)
- + " pass_f1_score:" + str(pass_f1_score))
+ print("avg_cost:" + str(cost) + " precision:" + str(
+ precision) + " recall:" + str(recall) + " f1_score:" +
+ str(f1_score) + " pass_precision:" + str(
+ pass_precision) + " pass_recall:" + str(
+ pass_recall) + " pass_f1_score:" + str(
+ pass_f1_score))
if batch_id != 0:
- print("second per batch: " + str(
- (time.time() - start_time) / batch_id))
+ print("second per batch: " + str((time.time(
+ ) - start_time) / batch_id))
# Set the threshold low to speed up the CI test
if float(pass_precision) > 0.05:
if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode
- fluid.io.save_inference_model(
- save_dirname, [
- 'word_data', 'verb_data', 'ctx_n2_data',
- 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
- 'ctx_p2_data', 'mark_data'
- ], [feature_out], exe)
+ fluid.io.save_inference_model(save_dirname, [
+ 'word_data', 'verb_data', 'ctx_n2_data',
+ 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data',
+ 'ctx_p2_data', 'mark_data'
+ ], [feature_out], exe)
return
batch_id = batch_id + 1
@@ -320,20 +321,19 @@ def infer(use_cuda, save_dirname=None):
assert feed_target_names[6] == 'ctx_p2_data'
assert feed_target_names[7] == 'mark_data'
- results = exe.run(
- inference_program,
- feed={
- feed_target_names[0]: word,
- feed_target_names[1]: pred,
- feed_target_names[2]: ctx_n2,
- feed_target_names[3]: ctx_n1,
- feed_target_names[4]: ctx_0,
- feed_target_names[5]: ctx_p1,
- feed_target_names[6]: ctx_p2,
- feed_target_names[7]: mark
- },
- fetch_list=fetch_targets,
- return_numpy=False)
+ results = exe.run(inference_program,
+ feed={
+ feed_target_names[0]: word,
+ feed_target_names[1]: pred,
+ feed_target_names[2]: ctx_n2,
+ feed_target_names[3]: ctx_n1,
+ feed_target_names[4]: ctx_0,
+ feed_target_names[5]: ctx_p1,
+ feed_target_names[6]: ctx_p2,
+ feed_target_names[7]: mark
+ },
+ fetch_list=fetch_targets,
+ return_numpy=False)
print(results[0].lod())
np_data = np.array(results[0])
print("Inference Shape: ", np_data.shape)
From 284a2137742f63e8a70f4d98805328edc067f054 Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Tue, 10 Apr 2018 07:46:03 +0000
Subject: [PATCH 29/62] fix a name conflict
---
python/paddle/fluid/layers/io.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index dbba1a46eb..7b590fb510 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -350,7 +350,7 @@ def open_recordio_file(filename,
main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
if for_parallel:
- main_prog_var = for_parallel(reader=main_prog_var)
+ main_prog_var = parallel(reader=main_prog_var)
return monkey_patch_reader_methods(main_prog_var)
@@ -435,7 +435,7 @@ def open_files(filenames,
reader=main_prog_reader, pass_num=pass_num)
if for_parallel:
- main_prog_reader = for_parallel(reader=main_prog_reader)
+ main_prog_reader = parallel(reader=main_prog_reader)
return monkey_patch_reader_methods(main_prog_reader)
@@ -474,7 +474,7 @@ def multi_pass(reader, pass_num):
{'pass_num': int(pass_num)})
-def for_parallel(reader):
+def parallel(reader):
return __create_decorated_reader__('create_threaded_reader', reader)
From 0bf799a52388dd77743623dcb2d1ebacb352858b Mon Sep 17 00:00:00 2001
From: typhoonzero
Date: Tue, 10 Apr 2018 17:00:06 +0800
Subject: [PATCH 30/62] wip testing
---
paddle/fluid/framework/details/CMakeLists.txt | 2 +-
.../framework/details/multi_devices_graph_builder.cc | 10 ++++------
.../framework/details/multi_devices_graph_builder.h | 7 ++-----
paddle/fluid/framework/parallel_executor.h | 4 ++--
paddle/fluid/operators/detail/serde_test.cc | 2 +-
paddle/fluid/pybind/pybind.cc | 1 +
python/paddle/fluid/parallel_executor.py | 7 +++++--
7 files changed, 16 insertions(+), 17 deletions(-)
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
index caaf418076..85b649b293 100644
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@@ -16,7 +16,7 @@ else()
set(multi_devices_graph_builder_deps)
endif()
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
- scale_loss_grad_op_handle ${multi_devices_graph_builder_deps})
+ scale_loss_grad_op_handle send_op_handle ${multi_devices_graph_builder_deps})
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto)
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context)
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
index 8a28b18715..8a53270110 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -35,22 +35,20 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::string &loss_var_name,
const std::unordered_set ¶ms,
const std::vector &local_scopes,
- platform::NCCLContextMap *nccl_ctxs, bool distributed)
+ platform::NCCLContextMap *nccl_ctxs)
: loss_var_name_(loss_var_name),
places_(places),
local_scopes_(local_scopes),
- distributed_(distributed),
nccl_ctxs_(nccl_ctxs) {
#else
MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
const std::vector &places,
const std::string &loss_var_name,
const std::unordered_set ¶ms,
- const std::vector &local_scopes, bool distributed)
+ const std::vector &local_scopes)
: loss_var_name_(loss_var_name),
places_(places),
- local_scopes_(local_scopes),
- distributed_(distributed) {
+ local_scopes_(local_scopes) {
#endif
for (auto &p : params) {
grad_names_.insert(GradVarName(p));
@@ -99,7 +97,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
// append send op if program is distributed trainer main program.
// always use the first device
- if (is_forwarding && distributed_ && op->Type() == "send") {
+ if (!is_forwarding && op->Type() == "send") {
auto &p = places_[0];
auto *s = local_scopes_[0];
size_t i = 0;
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h
index 004d6d50ab..de34caab1b 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -34,14 +34,12 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const std::string &loss_var_name,
const std::unordered_set ¶ms,
const std::vector &local_scopes,
- platform::NCCLContextMap *nccl_ctxs,
- bool distributed = false);
+ platform::NCCLContextMap *nccl_ctxs);
#else
MultiDevSSAGraphBuilder(const std::vector &places,
const std::string &loss_var_name,
const std::unordered_set ¶ms,
- const std::vector &local_scopes,
- bool distributed = false);
+ const std::vector &local_scopes);
#endif
std::unique_ptr Build(const ProgramDesc &program) const override;
@@ -55,7 +53,6 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const std::vector &places_;
const std::vector &local_scopes_;
std::unordered_set grad_names_;
- bool distributed_;
#ifdef PADDLE_WITH_CUDA
platform::NCCLContextMap *nccl_ctxs_;
diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h
index c048c3865f..b4f16dba85 100644
--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@@ -48,13 +48,13 @@ class ParallelExecutor {
const std::string& fetched_var_name,
const std::unordered_map& feed_tensors);
+ void BCastParamsToGPUs(const std::unordered_set& vars) const;
+
private:
void SplitTensorToPlaces(
const std::unordered_map& feed_tensors);
ParallelExecutorPrivate* member_;
-
- void BCastParamsToGPUs(const std::unordered_set& vars) const;
};
} // namespace framework
diff --git a/paddle/fluid/operators/detail/serde_test.cc b/paddle/fluid/operators/detail/serde_test.cc
index f8cae6b26a..cb5f895834 100644
--- a/paddle/fluid/operators/detail/serde_test.cc
+++ b/paddle/fluid/operators/detail/serde_test.cc
@@ -107,7 +107,7 @@ void RunSerdeTestSelectedRows(platform::Place place) {
for (int i = 0; i < tensor_numel; ++i) {
EXPECT_FLOAT_EQ(tensor_data2[i], 32.7);
}
- for (int64_t i = 0; i < rows2->size(); ++i) {
+ for (size_t i = 0; i < rows2->size(); ++i) {
EXPECT_EQ(rows_data2[i], i);
}
EXPECT_EQ(slr2->height(), 1000);
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 3924040455..a9a5d87d77 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -554,6 +554,7 @@ All parameter, weight, gradient are variables in Paddle.
bcast_vars, main_program, loss_var_name,
scope, local_scopes, allow_op_delay);
})
+ .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs)
.def("local_scopes",
[](ParallelExecutor &self) -> std::vector * {
return &self.GetLocalScopes();
diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py
index b93f2f974c..a23cc9b772 100644
--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -99,7 +99,7 @@ class ParallelExecutor(object):
local_scopes = share_vars_from.executor.local_scopes(
) if share_vars_from else []
- persistable_vars = [
+ self.persistable_vars = [
v.name
for v in filter(lambda var: var.persistable, main.list_vars())
]
@@ -112,7 +112,7 @@ class ParallelExecutor(object):
p.name for p in main.global_block().iter_parameters()
if not p.stop_gradient
]),
- set(persistable_vars),
+ set(self.persistable_vars),
main.desc,
loss_name if loss_name else '',
scope,
@@ -142,3 +142,6 @@ class ParallelExecutor(object):
self.executor.run(fetch_list, fetch_var_name, feed_tensor_dict)
arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array()
return [arr[i] for i in range(len(arr))]
+
+ def bcast_params(self):
+ self.executor.bcast_params(set(self.persistable_vars))
From ad6ddf533cfb1542283f741cddb78835fb3b8658 Mon Sep 17 00:00:00 2001
From: jshower
Date: Tue, 10 Apr 2018 09:23:11 +0000
Subject: [PATCH 31/62] for ci
---
python/paddle/fluid/tests/book/test_label_semantic_roles.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index ace2e39ba4..4d8bca4d24 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -37,7 +37,7 @@ depth = 8
mix_hidden_lr = 1e-3
IS_SPARSE = True
-PASS_NUM = 10
+PASS_NUM = 100
BATCH_SIZE = 10
embedding_name = 'emb'
@@ -234,7 +234,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
print("second per batch: " + str((time.time(
) - start_time) / batch_id))
# Set the threshold low to speed up the CI test
- if float(pass_precision) > 0.05:
+ if float(pass_precision) > 0.01:
if save_dirname is not None:
# TODO(liuyiqun): Change the target to crf_decode
fluid.io.save_inference_model(save_dirname, [
From 40e3fe173ca06a8196c3a24906923833cfb0f372 Mon Sep 17 00:00:00 2001
From: Yu Yang
Date: Tue, 10 Apr 2018 17:46:53 +0800
Subject: [PATCH 32/62] Make cuda_helper.h Pass cpplint
---
paddle/fluid/platform/cuda_helper.h | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h
index 881d611d4a..8758af0804 100644
--- a/paddle/fluid/platform/cuda_helper.h
+++ b/paddle/fluid/platform/cuda_helper.h
@@ -33,22 +33,26 @@ constexpr int PADDLE_CUDA_NUM_THREADS = 512;
USE_CUDA_ATOMIC(Add, float);
USE_CUDA_ATOMIC(Add, int);
USE_CUDA_ATOMIC(Add, unsigned int);
-USE_CUDA_ATOMIC(Add, unsigned long long int);
+// CUDA API uses unsigned long long int, we cannot use uint64_t here.
+// It because unsigned long long int is not necessarily uint64_t
+USE_CUDA_ATOMIC(Add, unsigned long long int); // NOLINT
CUDA_ATOMIC_WRAPPER(Add, int64_t) {
- static_assert(sizeof(int64_t) == sizeof(long long int),
+ // Here, we check long long int must be int64_t.
+ static_assert(sizeof(int64_t) == sizeof(long long int), // NOLINT
"long long should be int64");
- return CudaAtomicAdd(reinterpret_cast(address),
- static_cast(val));
+ return CudaAtomicAdd(
+ reinterpret_cast(address), // NOLINT
+ static_cast(val)); // NOLINT
}
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
USE_CUDA_ATOMIC(Add, double);
#else
CUDA_ATOMIC_WRAPPER(Add, double) {
- unsigned long long int* address_as_ull =
- reinterpret_cast(address);
- unsigned long long int old = *address_as_ull, assumed;
+ unsigned long long int* address_as_ull = // NOLINT
+ reinterpret_cast(address); // NOLINT
+ unsigned long long int old = *address_as_ull, assumed; // NOLINT
do {
assumed = old;
From ce08dc8751b5f605ce6aece70ce6f16af72f4759 Mon Sep 17 00:00:00 2001
From: typhoonzero
Date: Tue, 10 Apr 2018 20:40:43 +0800
Subject: [PATCH 33/62] have stream removed error
---
.../details/multi_devices_graph_builder.cc | 34 ++++++++-----------
.../details/multi_devices_graph_builder.h | 2 +-
.../fluid/framework/details/send_op_handle.cc | 10 +++---
.../fluid/framework/details/send_op_handle.h | 4 +--
python/paddle/fluid/distribute_transpiler.py | 1 +
python/paddle/fluid/parallel_executor.py | 4 ++-
6 files changed, 24 insertions(+), 31 deletions(-)
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
index 8a53270110..0ebcd627bd 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -57,8 +57,11 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
const platform::Place &p,
- const size_t &i) const {
+ const size_t &i,
+ bool create_output) const {
auto *op_handle = result->ops_.back().get();
+ op_handle->dev_ctxes_[p] = const_cast(
+ platform::DeviceContextPool::Instance().Get(p));
auto var_names = op->InputArgumentNames();
@@ -66,10 +69,12 @@ void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
VarHandle *var = CreateOrGetLatestVarHandle(result, each_var_name, p, i);
op_handle->AddInput(var);
}
- var_names = op->OutputArgumentNames();
+ if (create_output) {
+ var_names = op->OutputArgumentNames();
- for (auto &each_var_name : var_names) {
- CreateOpOutput(result, op_handle, each_var_name, p, i);
+ for (auto &each_var_name : var_names) {
+ CreateOpOutput(result, op_handle, each_var_name, p, i);
+ }
}
}
@@ -100,9 +105,11 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
if (!is_forwarding && op->Type() == "send") {
auto &p = places_[0];
auto *s = local_scopes_[0];
- size_t i = 0;
- result.ops_.emplace_back(new SendOpHandle(*op, s, p));
- CreateOpHandleIOs(&result, op, p, i);
+ // FIXME(wuyi): send op always copy from GPU 0
+ result.ops_.emplace_back(new SendOpHandle(*op, s));
+ // Create inputs for output on original place and no ssa output
+ // is created for send op.
+ CreateOpHandleIOs(&result, op, p, 0, false);
continue;
}
@@ -112,23 +119,10 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
result.ops_.emplace_back(new ComputationOpHandle(*op, s, p));
auto *op_handle = result.ops_.back().get();
- op_handle->dev_ctxes_[p] = const_cast(
- platform::DeviceContextPool::Instance().Get(p));
-
CreateOpHandleIOs(&result, op, p, i);
- // auto var_names = op->InputArgumentNames();
- // for (auto &each_var_name : var_names) {
- // VarHandle *var =
- // CreateOrGetLatestVarHandle(&result, each_var_name, p, i);
- // op_handle->AddInput(var);
- // }
auto var_names = op->OutputArgumentNames();
- // for (auto &each_var_name : var_names) {
- // CreateOpOutput(&result, op_handle, each_var_name, p, i);
- // }
-
if (is_forwarding) {
if (var_names.size() == 1 && var_names[0] == loss_var_name_) {
// Insert ScaleCost OpHandle
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h
index de34caab1b..137c817fde 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -46,7 +46,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
private:
void CreateOpHandleIOs(SSAGraph *result, OpDesc *op, const platform::Place &p,
- const size_t &i) const;
+ const size_t &i, bool create_output = true) const;
private:
std::string loss_var_name_;
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
index ae5637b804..caacfa6b1e 100644
--- a/paddle/fluid/framework/details/send_op_handle.cc
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -19,11 +19,9 @@ namespace framework {
namespace details {
SendOpHandle::SendOpHandle(const framework::OpDesc &op_desc,
- const Scope *local_scope,
- const platform::Place &place)
+ const Scope *local_scope)
: op_(framework::OpRegistry::CreateOp(op_desc)),
- local_scope_(local_scope),
- place_(place) {}
+ local_scope_(local_scope) {}
void SendOpHandle::RunImpl() {
// Wait input done
@@ -31,8 +29,8 @@ void SendOpHandle::RunImpl() {
auto &p = static_cast(in)->place_;
in->generated_op_->Wait(dev_ctxes_[p]);
}
-
- op_->Run(*local_scope_, place_);
+ platform::CPUPlace cpu;
+ op_->Run(*local_scope_, cpu);
}
std::string SendOpHandle::Name() const { return "send"; }
diff --git a/paddle/fluid/framework/details/send_op_handle.h b/paddle/fluid/framework/details/send_op_handle.h
index e7857c1f23..8a7b62ba1c 100644
--- a/paddle/fluid/framework/details/send_op_handle.h
+++ b/paddle/fluid/framework/details/send_op_handle.h
@@ -31,10 +31,8 @@ namespace details {
struct SendOpHandle : public OpHandleBase {
std::unique_ptr op_;
const Scope* local_scope_;
- const platform::Place& place_;
- SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope,
- const platform::Place& place);
+ SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope);
std::string Name() const override;
diff --git a/python/paddle/fluid/distribute_transpiler.py b/python/paddle/fluid/distribute_transpiler.py
index 0ec3ebc7e3..e18ace844e 100644
--- a/python/paddle/fluid/distribute_transpiler.py
+++ b/python/paddle/fluid/distribute_transpiler.py
@@ -255,6 +255,7 @@ class DistributeTranspiler:
def get_trainer_program(self):
# remove optimize ops and add a send op to main_program
self.program.global_block().delete_ops(self.optimize_ops)
+ self.program.sync_with_cpp()
# FIXME(typhoonzero): serialize once will fix error occurs when clone.
self.program.__str__()
return self.program
diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py
index a23cc9b772..c709f364c1 100644
--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -101,7 +101,9 @@ class ParallelExecutor(object):
self.persistable_vars = [
v.name
- for v in filter(lambda var: var.persistable, main.list_vars())
+ for v in filter(lambda var: \
+ var.persistable and var.type != core.VarDesc.VarType.RAW,
+ main.list_vars())
]
self.executor = core.ParallelExecutor(
From a84b81502cd26c02fbc3b4c46d751b0363a5ff46 Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Wed, 11 Apr 2018 01:30:56 +0800
Subject: [PATCH 34/62] Remove Readers' HasNext()
---
paddle/fluid/framework/lod_tensor.cc | 32 ++++----
paddle/fluid/framework/lod_tensor.h | 7 +-
paddle/fluid/framework/lod_tensor_test.cc | 18 ++---
paddle/fluid/framework/reader.h | 13 +---
.../reader/create_double_buffer_reader_op.cc | 38 +++++-----
.../reader/create_multi_pass_reader_op.cc | 16 +---
.../reader/create_random_data_generator_op.cc | 4 +-
.../reader/create_recordio_file_reader_op.cc | 10 +--
.../reader/create_shuffle_reader_op.cc | 24 +++---
.../reader/create_threaded_reader_op.cc | 75 ++++---------------
.../fluid/operators/reader/open_files_op.cc | 25 ++++---
paddle/fluid/pybind/pybind.cc | 1 -
paddle/fluid/pybind/recordio.cc | 2 +-
python/paddle/fluid/layers/io.py | 10 +--
14 files changed, 105 insertions(+), 170 deletions(-)
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index 8155cb55a4..a56674cbe2 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -12,9 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include "paddle/fluid/framework/lod_tensor.h"
+#include
+#include
+#include
+#include
+
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
@@ -22,11 +27,6 @@ limitations under the License. */
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
-#include
-#include
-#include
-#include
-
namespace paddle {
namespace framework {
@@ -294,7 +294,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast(tensor), dev_ctx);
}
-void WriteToRecordIO(recordio::Writer &writer,
+void WriteToRecordIO(recordio::Writer *writer,
const std::vector &tensor,
const platform::DeviceContext &dev_ctx) {
std::stringstream buffer;
@@ -303,18 +303,20 @@ void WriteToRecordIO(recordio::Writer &writer,
for (auto &each : tensor) {
SerializeToStream(buffer, each, dev_ctx);
}
- writer.Write(buffer.str());
+ writer->Write(buffer.str());
}
std::vector ReadFromRecordIO(
- recordio::Scanner &scanner, const platform::DeviceContext &dev_ctx) {
- std::istringstream sin(scanner.Next());
- uint32_t sz;
- sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ recordio::Scanner *scanner, const platform::DeviceContext &dev_ctx) {
std::vector result;
- result.resize(sz);
- for (uint32_t i = 0; i < sz; ++i) {
- DeserializeFromStream(sin, &result[i], dev_ctx);
+ if (scanner->HasNext()) {
+ std::istringstream sin(scanner->Next());
+ uint32_t sz;
+ sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ result.resize(sz);
+ for (uint32_t i = 0; i < sz; ++i) {
+ DeserializeFromStream(sin, &result[i], dev_ctx);
+ }
}
return result;
}
diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h
index 4f130d2659..1159fee39b 100644
--- a/paddle/fluid/framework/lod_tensor.h
+++ b/paddle/fluid/framework/lod_tensor.h
@@ -15,6 +15,9 @@ limitations under the License. */
#pragma once
#include
+#include
+#include
+#include
#ifdef PADDLE_WITH_CUDA
#include
#include
@@ -216,12 +219,12 @@ void SerializeToStream(std::ostream& os, const LoDTensor& tensor,
void DeserializeFromStream(std::istream& is, LoDTensor* tensor,
const platform::DeviceContext& dev_ctx);
-extern void WriteToRecordIO(recordio::Writer& writer,
+extern void WriteToRecordIO(recordio::Writer* writer,
const std::vector& tensor,
const platform::DeviceContext& dev_ctx);
extern std::vector ReadFromRecordIO(
- recordio::Scanner& scanner, const platform::DeviceContext& dev_ctx);
+ recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc
index e691e29383..97ab98f09b 100644
--- a/paddle/fluid/framework/lod_tensor_test.cc
+++ b/paddle/fluid/framework/lod_tensor_test.cc
@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "paddle/fluid/framework/lod_tensor.h"
-
-#include "paddle/fluid/recordio/scanner.h"
-#include "paddle/fluid/recordio/writer.h"
-
#include
#include
#include
#include
#include
+#include "paddle/fluid/framework/lod_tensor.h"
+
+#include "paddle/fluid/recordio/scanner.h"
+#include "paddle/fluid/recordio/writer.h"
+
namespace paddle {
namespace framework {
@@ -240,8 +240,8 @@ TEST(LoDTensor, RecordIO) {
*platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
{
recordio::Writer writer(stream, recordio::Compressor::kSnappy);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
writer.Flush();
}
@@ -254,11 +254,11 @@ TEST(LoDTensor, RecordIO) {
{
std::unique_ptr stream_ptr(stream);
recordio::Scanner scanner(std::move(stream_ptr));
- auto tensors = ReadFromRecordIO(scanner, ctx);
+ auto tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
- tensors = ReadFromRecordIO(scanner, ctx);
+ tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h
index 3573b99bec..3a413941df 100644
--- a/paddle/fluid/framework/reader.h
+++ b/paddle/fluid/framework/reader.h
@@ -14,14 +14,13 @@
#pragma once
+#include
+#include
+
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
-#include
-#include
-#include
-
namespace paddle {
namespace framework {
@@ -31,8 +30,6 @@ class ReaderBase {
virtual void ReInit() = 0;
- virtual bool HasNext() const = 0;
-
virtual ~ReaderBase();
};
@@ -44,8 +41,6 @@ class DecoratedReader : public ReaderBase {
void ReInit() override { reader_->ReInit(); }
- bool HasNext() const override { return reader_->HasNext(); }
-
protected:
ReaderBase* reader_;
};
@@ -80,8 +75,6 @@ class ReaderHolder {
reader_->ReInit();
}
- bool HasNext() const { return reader_->HasNext(); }
-
private:
std::unique_ptr reader_;
};
diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
index d9f799f14d..33a50b5ceb 100644
--- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc
@@ -63,13 +63,14 @@ class DoubleBufferReader : public framework::DecoratedReader {
StartPrefetcher();
}
- bool HasNext() const override;
void ReadNext(std::vector* out) override;
void ReInit() override;
~DoubleBufferReader() { EndPrefetcher(); }
private:
+ bool HasNext() const;
+
void StartPrefetcher() {
channel_ = framework::MakeChannel- (kChannelSize);
prefetcher_ = std::thread([this] { PrefetchThreadFunc(); });
@@ -149,22 +150,15 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase {
}
};
-bool DoubleBufferReader::HasNext() const {
- while (!channel_->IsClosed() && !channel_->CanReceive()) {
- }
- return channel_->CanReceive();
-}
-
void DoubleBufferReader::ReadNext(std::vector* out) {
- if (!HasNext()) {
- PADDLE_THROW("There is no next data!");
- }
-
- Item batch;
- channel_->Receive(&batch);
- *out = batch.payloads_;
- if (batch.ctx_) {
- batch.ctx_->Wait();
+ out->clear();
+ if (HasNext()) {
+ Item batch;
+ channel_->Receive(&batch);
+ *out = batch.payloads_;
+ if (batch.ctx_) {
+ batch.ctx_->Wait();
+ }
}
}
@@ -174,16 +168,26 @@ void DoubleBufferReader::ReInit() {
StartPrefetcher();
}
+bool DoubleBufferReader::HasNext() const {
+ while (!channel_->IsClosed() && !channel_->CanReceive()) {
+ }
+ return channel_->CanReceive();
+}
+
void DoubleBufferReader::PrefetchThreadFunc() {
VLOG(5) << "A new prefetch thread starts.";
std::vector> cpu_tensor_cache(kCacheSize);
std::vector> gpu_tensor_cache(kCacheSize);
size_t cached_tensor_id = 0;
- while (reader_->HasNext()) {
+ while (true) {
Item batch;
auto& cpu_batch = cpu_tensor_cache[cached_tensor_id];
reader_->ReadNext(&cpu_batch);
+ if (cpu_batch.empty()) {
+ // The underlying reader have no next data.
+ break;
+ }
if (platform::is_gpu_place(place_)) {
auto& gpu_batch = gpu_tensor_cache[cached_tensor_id];
auto* gpu_ctx = ctxs_[cached_tensor_id].get();
diff --git a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
index b72ccc77a3..0573345ba5 100644
--- a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc
@@ -25,22 +25,12 @@ class MultiPassReader : public framework::DecoratedReader {
: DecoratedReader(reader), pass_num_(pass_num), pass_count_(0) {}
void ReadNext(std::vector* out) override {
- if (!HasNext()) {
- PADDLE_THROW("There is no next data!");
- }
reader_->ReadNext(out);
- }
-
- bool HasNext() const override {
- if (reader_->HasNext()) {
- return true;
- } else {
+ if (out->empty()) {
++pass_count_;
- if (pass_count_ >= pass_num_) {
- return false;
- } else {
+ if (pass_count_ < pass_num_) {
reader_->ReInit();
- return true;
+ reader_->ReadNext(out);
}
}
}
diff --git a/paddle/fluid/operators/reader/create_random_data_generator_op.cc b/paddle/fluid/operators/reader/create_random_data_generator_op.cc
index 95d8674c08..d1cb8e47da 100644
--- a/paddle/fluid/operators/reader/create_random_data_generator_op.cc
+++ b/paddle/fluid/operators/reader/create_random_data_generator_op.cc
@@ -52,8 +52,6 @@ class RandomDataGenerator : public framework::ReaderBase {
void ReInit() override { return; }
- bool HasNext() const override { return true; }
-
private:
float min_;
float max_;
@@ -74,7 +72,7 @@ class CreateRandomDataGeneratorOp : public framework::OperatorBase {
const auto& ranks = Attr>("ranks");
PADDLE_ENFORCE(!shape_concat.empty() && !ranks.empty());
PADDLE_ENFORCE_EQ(std::accumulate(ranks.begin(), ranks.end(), 0),
- int(shape_concat.size()),
+ static_cast(shape_concat.size()),
"The accumulate of all ranks should be equal to the "
"shape concat's length.");
std::vector shapes = RestoreShapes(shape_concat, ranks);
diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
index adaa0b9e5f..2ae2972556 100644
--- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include
-#include
#include "paddle/fluid/operators/reader/reader_op_registry.h"
#include "paddle/fluid/recordio/scanner.h"
@@ -35,17 +33,15 @@ class RecordIOFileReader : public framework::FileReader {
LOG(INFO) << "Creating file reader" << filename;
}
- bool HasNext() const override { return scanner_.HasNext(); }
-
void ReInit() override { scanner_.Reset(); }
protected:
void ReadNextImpl(std::vector* out) override {
if (ThreadSafe) {
std::lock_guard guard(*mutex_);
- *out = framework::ReadFromRecordIO(scanner_, dev_ctx_);
+ *out = framework::ReadFromRecordIO(&scanner_, dev_ctx_);
} else {
- *out = framework::ReadFromRecordIO(scanner_, dev_ctx_);
+ *out = framework::ReadFromRecordIO(&scanner_, dev_ctx_);
}
}
@@ -66,7 +62,7 @@ class CreateRecordIOReaderOp : public framework::OperatorBase {
const auto& ranks = Attr>("ranks");
PADDLE_ENFORCE(!shape_concat.empty() && !ranks.empty());
PADDLE_ENFORCE_EQ(std::accumulate(ranks.begin(), ranks.end(), 0),
- int(shape_concat.size()),
+ static_cast(shape_concat.size()),
"The accumulate of all ranks should be equal to the "
"shape concat's length.");
std::string filename = Attr("filename");
diff --git a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
index b164ce232d..13825d6591 100644
--- a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
@@ -30,35 +30,33 @@ class ShuffleReader : public framework::DecoratedReader {
std::random_device device;
seed_ = device();
}
- ReadIntoBuffers();
+ ReloadBuffer();
}
void ReadNext(std::vector* out) override {
- if (!HasNext()) {
- PADDLE_THROW("There is no next data!");
- }
+ out->clear();
if (iteration_pos_ >= buffer_.size()) {
VLOG(10) << "Resetting shuffle buffer";
- ReadIntoBuffers();
+ ReloadBuffer();
+ if (buffer_.empty()) {
+ return;
+ }
}
*out = buffer_[iteration_pos_++];
}
- bool HasNext() const override {
- return iteration_pos_ < buffer_.size() || reader_->HasNext();
- }
-
private:
- void ReadIntoBuffers() {
+ void ReloadBuffer() {
buffer_.clear();
buffer_.reserve(buffer_size_);
iteration_pos_ = 0;
for (size_t i = 0; i < buffer_size_; ++i) {
- if (!reader_->HasNext()) {
+ std::vector ins;
+ reader_->ReadNext(&ins);
+ if (ins.empty()) {
break;
}
- buffer_.emplace_back();
- reader_->ReadNext(&buffer_.back());
+ buffer_.emplace_back(ins);
}
std::mt19937 g(seed_);
std::shuffle(buffer_.begin(), buffer_.end(), g);
diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
index 7b10135afc..cbf709d5e7 100644
--- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_threaded_reader_op.cc
@@ -21,67 +21,27 @@ namespace reader {
class ThreadedReader : public framework::DecoratedReader {
public:
- ThreadedReader(ReaderBase* reader, bool unsafe_mode)
- : DecoratedReader(reader), unsafe_mode_(unsafe_mode) {}
+ ThreadedReader(ReaderBase* reader, bool safe_mode)
+ : DecoratedReader(reader), safe_mode_(safe_mode) {}
void ReadNext(std::vector* out) override {
std::lock_guard lock(mutex_);
- if (!unsafe_mode_) {
- if (!reader_->HasNext()) {
- PADDLE_THROW("There is no next data!");
- }
- reader_->ReadNext(out);
- } else {
- auto& thread_buffer = thread_buffers_[std::this_thread::get_id()];
- if (thread_buffer.empty()) {
- PADDLE_THROW(
- "thread_buffer is empty! HasNext() must be invoked before "
- "ReadNext() in the same thread.");
- }
- *out = thread_buffer;
- thread_buffer.clear();
- }
- }
-
- bool HasNext() const override {
- if (!unsafe_mode_) {
- PADDLE_THROW(
- "ThreadedReader::HasNext() is disabled when 'unsafe_mode' is false.");
- }
- std::thread::id thread_id = std::this_thread::get_id();
- std::lock_guard lock(mutex_);
- auto& thread_buffer = thread_buffers_[thread_id];
- if (thread_buffer.empty() && reader_->HasNext()) {
- reader_->ReadNext(&thread_buffer);
- }
- return !thread_buffer.empty();
+ reader_->ReadNext(out);
}
void ReInit() override {
- if (!unsafe_mode_) {
+ if (safe_mode_) {
PADDLE_THROW(
- "ThreadedReader::ReInit() is disabled when 'unsafe_mode' is false.");
+ "ThreadedReader::ReInit() is disabled when 'safe_mode' is true.");
}
VLOG(5) << "ThreadedReader::ReInit() is invoked! It might be buggy in "
"multi-thread environment.";
reader_->ReInit();
}
- ~ThreadedReader() {
- for (auto& p : thread_buffers_) {
- if (!p.second.empty()) {
- PADDLE_THROW(
- "Find an unused data batch in ThreadedReader! Maybe one thread "
- "invokes 'HasNext()' without subsequent 'ReadNext()'.");
- }
- }
- }
-
private:
- bool unsafe_mode_;
- mutable std::mutex mutex_;
- mutable std::unordered_map>
- thread_buffers_;
+ bool safe_mode_;
+ std::mutex mutex_;
};
class CreateThreadedReaderOp : public framework::OperatorBase {
@@ -98,8 +58,8 @@ class CreateThreadedReaderOp : public framework::OperatorBase {
}
const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader"))
->Get();
- bool unsafe_mode = Attr("unsafe_mode");
- out->Reset(new ThreadedReader(underlying_reader.Get(), unsafe_mode));
+ bool safe_mode = Attr("safe_mode");
+ out->Reset(new ThreadedReader(underlying_reader.Get(), safe_mode));
}
};
@@ -107,10 +67,9 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
public:
CreateThreadedReaderOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
: DecoratedReaderMakerBase(op_proto, op_checker) {
- AddAttr("unsafe_mode",
- "When 'unsafe_mode' is false, invoking 'HasNext()' or "
- "'ReInit()' is not allowed to avoid unexpected bugs in "
- "multi-thread environment.")
+ AddAttr("safe_mode",
+ "When 'safe_mode' is true, 'ReInit()' is disabled to avoid "
+ "unexpected bugs in multi-thread environment.")
.SetDefault(true);
AddComment(R"DOC(
CreateThreadedReader Operator
@@ -118,13 +77,9 @@ class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase {
This operator creates a threaded reader. A threaded reader's
'ReadNext()' can be invoked by several threads at the same
time.
- When the attribute 'unsafe_mode' is false, the threaded reader's
- 'HasNext()' and 'ReInit()' will be disabled to avoid unexpected
- bugs in multi-thread environment. If you really need them, you
- can enable them by setting 'unsafe_mode' true. In this case,
- 'HasNext()' returning true only guarantees the safety of
- invoking 'ReadNext()' in the same thread. Each thread must
- invoke 'HasNext()' and 'ReadNext()' in pairs.
+ When the attribute 'safe_mode' is true, the threaded reader's
+ 'ReInit()' is disabled to avoid unexpected bugs in multi-thread
+ environment.
)DOC");
}
};
diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc
index 45db94e780..9ce2e5dc2c 100644
--- a/paddle/fluid/operators/reader/open_files_op.cc
+++ b/paddle/fluid/operators/reader/open_files_op.cc
@@ -30,12 +30,12 @@ class MultiFileReader : public framework::ReaderBase {
}
void ReadNext(std::vector* out) override;
- bool HasNext() const override;
void ReInit() override;
~MultiFileReader() { EndScheduler(); }
private:
+ bool HasNext();
void StartNewScheduler();
void EndScheduler();
void ScheduleThreadFunc();
@@ -52,16 +52,10 @@ class MultiFileReader : public framework::ReaderBase {
};
void MultiFileReader::ReadNext(std::vector* out) {
- if (!HasNext()) {
- PADDLE_THROW("There is no next data!");
+ out->clear();
+ if (HasNext()) {
+ buffer_->Receive(out);
}
- buffer_->Receive(out);
-}
-
-bool MultiFileReader::HasNext() const {
- while (!buffer_->IsClosed() && !buffer_->CanReceive()) {
- }
- return buffer_->CanReceive();
}
void MultiFileReader::ReInit() {
@@ -69,6 +63,12 @@ void MultiFileReader::ReInit() {
StartNewScheduler();
}
+bool MultiFileReader::HasNext() {
+ while (!buffer_->IsClosed() && !buffer_->CanReceive()) {
+ }
+ return buffer_->CanReceive();
+}
+
void MultiFileReader::StartNewScheduler() {
size_t thread_num = prefetchers_.size();
waiting_file_idx_ = framework::MakeChannel(file_names_.size());
@@ -140,9 +140,12 @@ void MultiFileReader::PrefetchThreadFunc(std::string file_name,
VLOG(5) << "The prefetch thread of file '" << file_name << "' starts.";
std::unique_ptr reader =
CreateReaderByFileName(file_name, dims_);
- while (reader->HasNext()) {
+ while (true) {
std::vector ins;
reader->ReadNext(&ins);
+ if (ins.empty()) {
+ break;
+ }
try {
buffer_->Send(&ins);
} catch (paddle::platform::EnforceNotMet e) {
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index bd8446df66..c7a5d1c714 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -252,7 +252,6 @@ All parameter, weight, gradient are variables in Paddle.
py::return_value_policy::reference);
py::class_(m, "Reader", "")
- .def("has_next", &framework::ReaderHolder::HasNext)
.def("reset", &framework::ReaderHolder::ReInit);
py::class_(m, "Scope", "")
diff --git a/paddle/fluid/pybind/recordio.cc b/paddle/fluid/pybind/recordio.cc
index 0644d91425..330d104e0a 100644
--- a/paddle/fluid/pybind/recordio.cc
+++ b/paddle/fluid/pybind/recordio.cc
@@ -39,7 +39,7 @@ class RecordIOWriter {
void CompleteAppendTensor() {
auto& ctx =
*platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
- framework::WriteToRecordIO(writer_, tensors_, ctx);
+ framework::WriteToRecordIO(&writer_, tensors_, ctx);
tensors_.clear();
}
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index d016ab9008..8ba6bd18e9 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -236,13 +236,9 @@ def monkey_patch_reader_methods(reader):
var = scope.find_var(reader.name)
return var.get_reader()
- def eof():
- return not __get_reader__().has_next()
-
def reset():
return __get_reader__().reset()
- reader.eof = eof
reader.reset = reset
reader.stop_gradient = True
reader.persistable = True
@@ -299,8 +295,7 @@ def open_recordio_file(filename,
shapes(list): List of tuples which declaring data shapes.
lod_levels(list): List of ints which declaring data lod_level.
dtypes(list): List of strs which declaring data type.
- pass_num(int): Number of passes to run. After completing the
- given number of passes, 'has_next()' will return False.
+ pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
@@ -377,8 +372,7 @@ def open_files(filenames,
dtypes(list): List of strs which declaring data type.
thread_num(int): The maximal concurrent prefetch thread number.
buffer_size(int): The size of prefetch buffer.
- pass_num(int): Number of passes to run. After completing the
- given number of passes, 'has_next()' will return False.
+ pass_num(int): Number of passes to run.
for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
From 7a7829466664aff8a41364d566b852ca5859bed2 Mon Sep 17 00:00:00 2001
From: fengjiayi
Date: Wed, 11 Apr 2018 01:37:24 +0800
Subject: [PATCH 35/62] Remove Readers' HasNext()
---
paddle/fluid/operators/reader/open_files_op.cc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc
index 9ce2e5dc2c..779dc8a6a0 100644
--- a/paddle/fluid/operators/reader/open_files_op.cc
+++ b/paddle/fluid/operators/reader/open_files_op.cc
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include // NOLINT
+
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/operators/reader/reader_op_registry.h"
From a7c6bf771c493cc9031975ceabcb126ef9ed1188 Mon Sep 17 00:00:00 2001
From: wanghaoshuang
Date: Wed, 11 Apr 2018 09:53:56 +0800
Subject: [PATCH 36/62] Change do_model_average_for_mean_and_var to boolean in
batch_normal.
---
python/paddle/fluid/layers/nn.py | 3 ---
python/paddle/fluid/optimizer.py | 3 ++-
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 37ce738275..56c37f05cc 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1518,9 +1518,6 @@ def batch_norm(input,
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
- if do_model_average_for_mean_and_var:
- do_model_average_for_mean_and_var = None
-
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name,
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 1917b7d044..36503cac6d 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -853,7 +853,8 @@ class ModelAverage(Optimizer):
self.params_grads = [] if params_grads is None else params_grads
params = {}
for param, grad in self.params_grads:
- params[param.name] = (param, grad)
+ if param.do_model_average != False:
+ params[param.name] = (param, grad)
for param in framework.default_main_program().global_block(
).all_parameters():
if param.name not in params and param.do_model_average != False:
From 72b5de05fee1c94c7bd40c8b69ff8c4fe2aff7d9 Mon Sep 17 00:00:00 2001
From: JiayiFeng
Date: Wed, 11 Apr 2018 02:54:56 +0000
Subject: [PATCH 37/62] update unittest
---
paddle/fluid/operators/read_op.cc | 1 +
.../tests/unittests/test_parallel_executor.py | 18 ++++++++++++------
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc
index 4496110cf8..bf02b99589 100644
--- a/paddle/fluid/operators/read_op.cc
+++ b/paddle/fluid/operators/read_op.cc
@@ -66,6 +66,7 @@ class ReadOp : public framework::OperatorBase {
std::vector out_arg_names = Outputs("Out");
std::vector ins;
reader->ReadNext(&ins);
+ PADDLE_ENFORCE(!ins.empty(), "There is no next data.");
PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size());
for (size_t i = 0; i < ins.size(); ++i) {
auto* out =
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
index 8401716db8..3c00f708f0 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
@@ -26,11 +26,14 @@ def simple_fc_net(use_feed):
img = fluid.layers.data(name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
else:
- reader = fluid.layers.open_recordio_file(
- filename='./mnist.recordio',
+ reader = fluid.layers.open_files(
+ filenames=['./mnist.recordio'],
shapes=[[-1, 784], [-1, 1]],
lod_levels=[0, 0],
- dtypes=['float32', 'int64'])
+ dtypes=['float32', 'int64'],
+ thread_num=1,
+ for_parallel=True)
+ reader = fluid.layers.io.double_buffer(reader)
img, label = fluid.layers.read_file(reader)
hidden = img
for _ in xrange(4):
@@ -51,11 +54,14 @@ def fc_with_batchnorm(use_feed):
img = fluid.layers.data(name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
else:
- reader = fluid.layers.open_recordio_file(
- filename='./mnist.recordio',
+ reader = fluid.layers.open_files(
+ filenames=['mnist.recordio'],
shapes=[[-1, 784], [-1, 1]],
lod_levels=[0, 0],
- dtypes=['float32', 'int64'])
+ dtypes=['float32', 'int64'],
+ thread_num=1,
+ for_parallel=True)
+ reader = fluid.layers.io.double_buffer(reader)
img, label = fluid.layers.read_file(reader)
hidden = img
From 129859e732fa7ac056c4c453619b2c84c98bc0ac Mon Sep 17 00:00:00 2001
From: qingqing01
Date: Wed, 11 Apr 2018 12:34:46 +0800
Subject: [PATCH 38/62] Support data type int64 in NCCL. (#9818)
---
paddle/fluid/platform/nccl_helper.h | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h
index 2999004320..3a2a423486 100644
--- a/paddle/fluid/platform/nccl_helper.h
+++ b/paddle/fluid/platform/nccl_helper.h
@@ -14,8 +14,9 @@
#pragma once
-#include
+#include // NOLINT
#include
+#include
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"
@@ -29,6 +30,8 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
return ncclDouble;
} else if (type == typeid(int)) { // NOLINT
return ncclInt;
+ } else if (type == typeid(int64_t)) { // NOLINT
+ return ncclInt64;
} else {
PADDLE_THROW("Not supported");
}
@@ -66,23 +69,23 @@ struct NCCLContext {
return boost::get(ctx_->GetPlace()).device;
}
- static void InitNCCLContext(std::unordered_map &contexts,
+ static void InitNCCLContext(std::unordered_map *contexts,
const std::vector &places) {
std::vector