|
|
|
@ -1334,14 +1334,9 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
|
|
|
|
|
|
|
|
|
|
train_exe = fluid.ParallelExecutor(use_cuda=True,
|
|
|
|
|
loss_name=loss.name,
|
|
|
|
|
build_strategy=build_strategy)
|
|
|
|
|
|
|
|
|
|
train_loss, = train_exe.run([loss.name], feed=feed_dict)
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
|
|
|
|
|
)DOC");
|
|
|
|
|
|
|
|
|
|
py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy")
|
|
|
|
@ -1363,11 +1358,19 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
self.reduce_ = strategy;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is STR, there are two reduce strategies in ParallelExecutor,
|
|
|
|
|
'AllReduce' and 'Reduce'. If you want that all the parameters'
|
|
|
|
|
optimization are done on all devices independently, you should choose 'AllReduce';
|
|
|
|
|
if you choose 'Reduce', all the parameters' optimization will be evenly distributed
|
|
|
|
|
to different devices, and then broadcast the optimized parameter to other devices.
|
|
|
|
|
In some models, `Reduce` is faster. Default 'AllReduce'. )DOC")
|
|
|
|
|
'AllReduce' and 'Reduce'. If you want that all the parameters'
|
|
|
|
|
optimization are done on all devices independently, you should choose 'AllReduce';
|
|
|
|
|
if you choose 'Reduce', all the parameters' optimization will be evenly distributed
|
|
|
|
|
to different devices, and then broadcast the optimized parameter to other devices.
|
|
|
|
|
In some models, `Reduce` is faster. Default 'AllReduce'.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"gradient_scale_strategy",
|
|
|
|
|
[](const BuildStrategy &self) { return self.gradient_scale_; },
|
|
|
|
@ -1377,10 +1380,18 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
self.gradient_scale_ = strategy;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is STR, there are three ways of defining :math:`loss@grad` in
|
|
|
|
|
ParallelExecutor, 'CoeffNumDevice', 'One' and 'Customized'. By default,
|
|
|
|
|
ParallelExecutor sets the :math:`loss@grad` according to the number of devices.
|
|
|
|
|
If you want to customize :math:`loss@grad`, you can choose 'Customized'.
|
|
|
|
|
Default 'CoeffNumDevice'.)DOC")
|
|
|
|
|
ParallelExecutor, 'CoeffNumDevice', 'One' and 'Customized'. By default,
|
|
|
|
|
ParallelExecutor sets the :math:`loss@grad` according to the number of devices.
|
|
|
|
|
If you want to customize :math:`loss@grad`, you can choose 'Customized'.
|
|
|
|
|
Default 'CoeffNumDevice'.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.gradient_scale_strategy = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"debug_graphviz_path",
|
|
|
|
|
[](const BuildStrategy &self) { return self.debug_graphviz_path_; },
|
|
|
|
@ -1389,8 +1400,16 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
self.debug_graphviz_path_ = path;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is STR, debug_graphviz_path indicate the path that
|
|
|
|
|
writing the SSA Graph to file in the form of graphviz, you.
|
|
|
|
|
It is useful for debugging. Default "")DOC")
|
|
|
|
|
writing the SSA Graph to file in the form of graphviz.
|
|
|
|
|
It is useful for debugging. Default ""
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.debug_graphviz_path = ""
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"enable_sequential_execution",
|
|
|
|
|
[](const BuildStrategy &self) {
|
|
|
|
@ -1400,7 +1419,15 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
|
|
|
|
|
self.enable_sequential_execution_ = b;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.)DOC")
|
|
|
|
|
R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.enable_sequential_execution = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"remove_unnecessary_lock",
|
|
|
|
|
[](const BuildStrategy &self) {
|
|
|
|
@ -1410,7 +1437,15 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
|
|
|
|
|
self.remove_unnecessary_lock_ = b;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default True.)DOC")
|
|
|
|
|
R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default True.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.remove_unnecessary_lock = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"num_trainers",
|
|
|
|
|
[](const BuildStrategy &self) { return self.num_trainers_; },
|
|
|
|
@ -1439,8 +1474,16 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
self.fuse_elewise_add_act_ops_ = b;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether
|
|
|
|
|
to fuse elementwise_add_op and activation_op,
|
|
|
|
|
it may make the execution faster. Default False)DOC")
|
|
|
|
|
to fuse elementwise_add_op and activation_op,
|
|
|
|
|
it may make the execution faster. Default False
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.fuse_elewise_add_act_ops = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"fuse_relu_depthwise_conv",
|
|
|
|
|
[](const BuildStrategy &self) {
|
|
|
|
@ -1451,10 +1494,18 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
self.fuse_relu_depthwise_conv_ = b;
|
|
|
|
|
},
|
|
|
|
|
R"DOC(The type is BOOL, fuse_relu_depthwise_conv indicate whether
|
|
|
|
|
to fuse relu and depthwise_conv2d,
|
|
|
|
|
it will save GPU memory and may make the execution faster.
|
|
|
|
|
This options is only available in GPU devices.
|
|
|
|
|
Default False.)DOC")
|
|
|
|
|
to fuse relu and depthwise_conv2d,
|
|
|
|
|
it will save GPU memory and may make the execution faster.
|
|
|
|
|
This options is only available in GPU devices.
|
|
|
|
|
Default False.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.fuse_relu_depthwise_conv = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"fuse_broadcast_ops",
|
|
|
|
|
[](const BuildStrategy &self) { return self.fuse_broadcast_ops_; },
|
|
|
|
@ -1491,7 +1542,15 @@ All parameter, weight, gradient are variables in Paddle.
|
|
|
|
|
Current implementation doesn't support FP16 training and CPU.
|
|
|
|
|
And only synchronous on one machine, not all machines.
|
|
|
|
|
|
|
|
|
|
Default False)DOC")
|
|
|
|
|
Default False
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
build_strategy = fluid.BuildStrategy()
|
|
|
|
|
build_strategy.sync_batch_norm = True
|
|
|
|
|
)DOC")
|
|
|
|
|
.def_property(
|
|
|
|
|
"memory_optimize",
|
|
|
|
|
[](const BuildStrategy &self) { return self.memory_optimize_; },
|
|
|
|
|