From 056fdedde3bb7f17743af49217ee757251a407e3 Mon Sep 17 00:00:00 2001 From: chengduo <30176695+chengduoZH@users.noreply.github.com> Date: Fri, 13 Sep 2019 19:32:49 +0800 Subject: [PATCH] Open fuse all reduce option (#19765) * Open fuse all reduce op test=develop * Add Fuse optimization op log * Add log in fuse_optimizer op pass and fuse all_reduce op pass * replace with boost::optional test=develop * Polish code test=develop * fix code coverage test=develop --- .../fluid/framework/details/build_strategy.cc | 47 +++++++-- .../fluid/framework/details/build_strategy.h | 6 +- .../fuse_adam_op_pass.cc | 6 +- .../fuse_momentum_op_pass.cc | 2 +- .../fuse_optimizer_op_pass.cc | 9 +- .../fuse_sgd_op_pass.cc | 2 +- .../fuse_all_reduce_op_pass.cc | 9 +- .../multi_devices_graph_pass.cc | 4 +- .../multi_devices_graph_pass.h | 2 +- paddle/fluid/pybind/pybind.cc | 95 +++++++++++-------- 10 files changed, 118 insertions(+), 64 deletions(-) diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index 216fb66c03..e4b0aa21a0 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -43,6 +43,12 @@ static inline bool SeqOnlyAllReduceOps(const BuildStrategy &strategy) { !strategy.enable_parallel_graph_; } +static inline void ConvertDefaultValue(boost::optional *default_value) { + if (*default_value == boost::none) { + *default_value = true; + } +} + class ParallelExecutorPassBuilder : public ir::PassBuilder { public: explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy) @@ -79,39 +85,55 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { void ResolveOptionConfliction() { // Specifies the restrictions between different pass. if (strategy_.enable_parallel_graph_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops doesn't work under " "parallel_graph."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "fuse_all_reduce_ops doesn't work under " "parallel_graph."; strategy_.fuse_all_reduce_ops_ = false; } if (strategy_.is_distribution_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops only works under " "Non-distributed mode."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "Currently, fuse_all_reduce_ops_ only works under " "Non-distributed mode."; strategy_.fuse_all_reduce_ops_ = false; } if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + LOG_IF(WARNING, strategy_.fuse_all_optimizer_ops_ == true) << "Currently, fuse_all_optimizer_ops only works under AllReduce " "mode."; strategy_.fuse_all_optimizer_ops_ = false; - VLOG_IF(3, strategy_.fuse_all_reduce_ops_) + LOG_IF(WARNING, strategy_.fuse_all_reduce_ops_ == true) << "fuse_all_optimizer_ops only work in Reducer mode."; strategy_.fuse_all_reduce_ops_ = false; } - if (strategy_.async_mode_) { - VLOG_IF(3, strategy_.fuse_all_optimizer_ops_) + if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { + LOG_IF(WARNING, strategy_.fuse_broadcast_ops_ == true) + << "Currently, fuse_broadcast_ops only works under Reduce " + "mode."; + strategy_.fuse_broadcast_ops_ = false; + } + + ConvertDefaultValue(&strategy_.fuse_all_optimizer_ops_); + ConvertDefaultValue(&strategy_.fuse_all_reduce_ops_); + ConvertDefaultValue(&strategy_.fuse_broadcast_ops_); + + if (strategy_.fuse_all_optimizer_ops_ == true) { + LOG_IF(WARNING, strategy_.async_mode_) << "Currently, fuse_all_optimizer_ops doesn't work under " "async mode."; - strategy_.fuse_all_optimizer_ops_ = false; + strategy_.fuse_all_optimizer_ops_ = !strategy_.async_mode_; + } + if (strategy_.fuse_all_reduce_ops_ == true) { + LOG_IF(WARNING, strategy_.async_mode_) + << "fuse_all_optimizer_ops only work in Reducer mode."; + strategy_.fuse_all_reduce_ops_ = !strategy_.async_mode_; } } @@ -151,7 +173,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { // NOTE: fuse_all_xx_ops will count the number of xx operator first, // if the number is zero, fuse_all_reduce_ops will do nothing. // Currently, only one type of optimization algorithm can be fused. - if (strategy_.fuse_all_optimizer_ops_) { + if (strategy_.fuse_all_optimizer_ops_ == true) { AppendPass("fuse_adam_op_pass"); AppendPass("fuse_sgd_op_pass"); AppendPass("fuse_momentum_op_pass"); @@ -207,6 +229,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { } } + void AppendPassWithCheck(const boost::optional &append_pass, + const std::string &pass_name) { + AppendPassWithCheck(append_pass == true, pass_name); + } + void AppendPassWithCheck(bool append_pass, const std::string &pass_name) { if (append_pass) { AppendPass(pass_name); diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 47409b89bc..5f0cc4b215 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -89,8 +89,8 @@ struct BuildStrategy { bool fuse_elewise_add_act_ops_{false}; // Fuse_all_optimizer_ops and fuse_all_reduce_ops require that gradients // should not be sparse types - bool fuse_all_optimizer_ops_{true}; - bool fuse_all_reduce_ops_{false}; + boost::optional fuse_all_optimizer_ops_{boost::none}; + boost::optional fuse_all_reduce_ops_{boost::none}; // fuse_relu_depthwise_conv can fuse the `relu -> // depthwise_conv` bool fuse_relu_depthwise_conv_{false}; @@ -98,7 +98,7 @@ struct BuildStrategy { // faster. Because fusing broadcast OP equals delaying the execution of all // broadcast Ops, in this case, all nccl streams are used only for reduce // operations for a period of time. - bool fuse_broadcast_ops_{true}; + boost::optional fuse_broadcast_ops_{boost::none}; // replace batch_norm with sync_batch_norm. bool sync_batch_norm_{false}; diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index 997f18d709..8aec098720 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -124,7 +124,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert adam to graph "; + VLOG(6) << "Insert adam to graph "; OpDesc adam_desc(adam_ops[0]->Op()->Block()); adam_desc.SetType("adam"); adam_desc.SetInput(kParam, {fused_vars_name.at(kParam)}); @@ -180,7 +180,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { scale_ops.emplace_back(*scale_op_iter); } PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size()); - VLOG(7) << "The number of scale op is " << scale_ops.size() << "."; + VLOG(6) << "The number of scale op is " << scale_ops.size() << "."; // Check attributions // NOTE: If new attribution is added, the following code maybe need change. int op_role = boost::get( @@ -205,7 +205,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert fused scale to graph."; + VLOG(6) << "Insert fused scale to graph."; OpDesc scale_desc(scale_ops[0]->Op()->Block()); scale_desc.SetType("scale"); scale_desc.SetInput("X", {fused_var_name}); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index ef958c7364..8f3a623a98 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -61,7 +61,7 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { // NOTE: fused_var is only exist in scope, so the graph doesn't have // fused_var node. - VLOG(7) << "Insert momentum to graph "; + VLOG(6) << "Insert momentum to graph "; OpDesc momentum_desc(momentum_ops[0]->Op()->Block()); momentum_desc.SetType("momentum"); momentum_desc.SetInput(kParam, {fused_vars_name.at(kParam)}); diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 74e2f93d9f..fcb5604a07 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -49,7 +49,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { } VLOG(6) << "Find " << fuse_op_type << " operators : " << opt_ops_num - << ", and " << opt_nodes.size() << " for dense gradients "; + << ", and " << opt_nodes.size() << " for dense gradients."; if (opt_nodes.size() == 0 || result.Has(details::kFusedOptType)) { if (result.Has(details::kFusedOptType)) { auto &opt_type = @@ -69,6 +69,11 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { return; } + LOG(WARNING) << "Find " << fuse_op_type << " operators : " << opt_ops_num + << ", and " << opt_nodes.size() << " for dense gradients. " + << "To make the speed faster, those optimization are fused " + "during training."; + result.Set(details::kFusedOptType, new details::FusedOptType); result.Get(details::kFusedOptType) = fuse_op_type; if (!result.Has(details::kProgramDescs)) { @@ -149,7 +154,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { &opt_nodes); grad_fused = true; } else { - VLOG(10) << "The number of new gradients is " << new_grad_idx.size(); + VLOG(6) << "The number of new gradients is " << new_grad_idx.size(); if (new_grad_idx.size() == 1) return; // NOTE(zcd): If the gradients of backward stage and optimization stage // have diff, Only take care of the the gradient of optimization stage. diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc index b202a6506d..3dd54cbc3c 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc @@ -42,7 +42,7 @@ class FuseSgdOpPass : public FuseOptimizerOpPass { int op_role = boost::get( sgd_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())); - VLOG(7) << "Insert sgd to graph "; + VLOG(6) << "Insert sgd to graph."; // Add fused scale OpDesc Sgd_desc(sgd_ops[0]->Op()->Block()); Sgd_desc.SetType("sgd"); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc index a7815b71e4..73d7bf6dba 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc @@ -56,7 +56,7 @@ class FuseAllReduceOpPass : public ir::Pass { std::unordered_map all_reduce_ops = GetAllReduceOps(result, places, grads); - VLOG(10) << "Find all_reduce_ops: " << all_reduce_ops.size(); + VLOG(6) << "Find all_reduce_ops: " << all_reduce_ops.size(); if (all_reduce_ops.size() == 0) { return; } @@ -65,11 +65,16 @@ class FuseAllReduceOpPass : public ir::Pass { "The number of all_reduce OpHandle is not equal to the " "number of grads. Maybe some gradients are sparse type, " "it is not supported currently."); - VLOG(10) << "Insert fused_all_reduce"; auto &group_params_grads = graph->Get( details::kGroupParamsAndDenseGrads); + LOG(WARNING) << string::Sprintf( + "Find all_reduce operators: %d. To make the speed faster, some " + "all_reduce ops are fused during training, after fusion, " + "the number of all_reduce ops is %d.", + all_reduce_ops.size(), group_params_grads.size()); + for (auto &group_p_g : group_params_grads) { size_t group_size = group_p_g.size(); PADDLE_ENFORCE_GT(group_size, static_cast(0)); diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 304446f82d..224ab21b47 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -699,7 +699,7 @@ bool ReduceSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, void ReduceSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { if (UseGPU()) { - if (strategy_.fuse_broadcast_ops_) { + if (strategy_.fuse_broadcast_ops_ == true) { CreateFusedBroadcastOp(result, bcast_var_name_set_); } else { for (size_t dev_id = 0; dev_id < bcast_var_name_set_.size(); ++dev_id) { @@ -1068,7 +1068,7 @@ void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const { strategy_.reduce_ == details::BuildStrategy::ReduceStrategy::kReduce) { return; } - if (strategy_.fuse_broadcast_ops_) { + if (strategy_.fuse_broadcast_ops_ == true) { CreateFusedBroadcastOp(result, bcast_var_name_set_); } else { for (size_t dev_id = 0; dev_id < bcast_var_name_set_.size(); ++dev_id) { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h index 1f2eed05fe..ea0455b6a8 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h @@ -123,7 +123,7 @@ class AsyncSSAGraphBuilder : public MultiDevSSAGraphBuilderBase { const std::string &g_name) const override {} bool NeedCollectiveForGrad(const std::string &grad_name, - std::vector ops) const { + std::vector ops) const override { return false; } diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index ad9501af6b..768839ec11 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -338,8 +338,8 @@ PYBIND11_MODULE(core_noavx, m) { recursive_sequence_lengths.end(), std::back_inserter(new_lod)); LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE( - CheckLoD(new_offset_lod, -1), + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, -1), true, "the provided recursive_sequence_lengths info is invalid"); new (&instance) LoDTensor(new_offset_lod); }) @@ -355,8 +355,9 @@ PYBIND11_MODULE(core_noavx, m) { LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), - "the provided lod info is invalid"); + PADDLE_ENFORCE_EQ( + CheckLoD(new_lod, vectorize(self.dims()).front()), true, + "the provided lod info is invalid"); self.set_lod(new_lod); }, py::arg("lod"), R"DOC( @@ -386,8 +387,8 @@ PYBIND11_MODULE(core_noavx, m) { recursive_sequence_lengths.end(), std::back_inserter(new_lod)); LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE( - CheckLoD(new_offset_lod, vectorize(self.dims()).front()), + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, vectorize(self.dims()).front()), true, "the provided recursive_sequence_lengths info is invalid"); self.set_lod(new_offset_lod); }, @@ -588,7 +589,7 @@ All parameter, weight, gradient are variables in Paddle. #endif .def("get_reader", [](Variable &self) -> framework::ReaderHolder * { - PADDLE_ENFORCE(self.IsType()); + PADDLE_ENFORCE_EQ(self.IsType(), true); return self.GetMutable(); }, py::return_value_policy::reference); @@ -713,8 +714,8 @@ All parameter, weight, gradient are variables in Paddle. auto &info = iter.second; if (info.HasOpProtoAndChecker()) { std::string str; - PADDLE_ENFORCE( - info.Proto().SerializeToString(&str), + PADDLE_ENFORCE_EQ( + info.Proto().SerializeToString(&str), true, "Serialize OpProto Error. This could be a bug of Paddle."); ret_values.emplace_back(str); } @@ -942,16 +943,17 @@ All parameter, weight, gradient are variables in Paddle. }); py::class_(m, "Operator") - .def_static("create", - [](py::bytes protobin) { - proto::OpDesc desc; - PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), - "Cannot parse user input to OpDesc"); - PADDLE_ENFORCE(desc.IsInitialized(), - "User OpDesc is not initialized, reason %s", - desc.InitializationErrorString()); - return OpRegistry::CreateOp(desc); - }) + .def_static( + "create", + [](py::bytes protobin) { + proto::OpDesc desc; + PADDLE_ENFORCE_EQ(desc.ParsePartialFromString(protobin), true, + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE_EQ(desc.IsInitialized(), true, + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + return OpRegistry::CreateOp(desc); + }) .def("run", [](OperatorBase &self, const Scope &scope, const platform::CPUPlace &place) { self.Run(scope, place); }) @@ -1323,7 +1325,8 @@ All parameter, weight, gradient are variables in Paddle. "reduce_strategy", [](const BuildStrategy &self) { return self.reduce_; }, [](BuildStrategy &self, BuildStrategy::ReduceStrategy strategy) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.reduce_ = strategy; }, R"DOC(The type is fluid.BuildStrategy.ReduceStrategy, there are two reduce @@ -1346,7 +1349,8 @@ All parameter, weight, gradient are variables in Paddle. [](const BuildStrategy &self) { return self.gradient_scale_; }, [](BuildStrategy &self, BuildStrategy::GradientScaleStrategy strategy) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finalized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finalized."); self.gradient_scale_ = strategy; }, R"DOC(The type is fluid.BuildStrategy.GradientScaleStrategy, there are three @@ -1407,7 +1411,8 @@ All parameter, weight, gradient are variables in Paddle. "debug_graphviz_path", [](const BuildStrategy &self) { return self.debug_graphviz_path_; }, [](BuildStrategy &self, const std::string &path) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.debug_graphviz_path_ = path; }, R"DOC(The type is STR, debug_graphviz_path indicates the path that @@ -1428,7 +1433,8 @@ All parameter, weight, gradient are variables in Paddle. return self.enable_sequential_execution_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.enable_sequential_execution_ = b; }, R"DOC(The type is BOOL. If set True, the execution order of ops would @@ -1447,7 +1453,8 @@ All parameter, weight, gradient are variables in Paddle. return self.remove_unnecessary_lock_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.remove_unnecessary_lock_ = b; }, R"DOC(The type is BOOL. If set True, some locks in GPU ops would be @@ -1508,7 +1515,8 @@ All parameter, weight, gradient are variables in Paddle. return self.fuse_elewise_add_act_ops_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_elewise_add_act_ops_ = b; }, R"DOC(The type is BOOL, fuse_elewise_add_act_ops indicate whether @@ -1528,7 +1536,8 @@ All parameter, weight, gradient are variables in Paddle. return self.fuse_relu_depthwise_conv_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_relu_depthwise_conv_ = b; }, R"DOC(The type is BOOL, fuse_relu_depthwise_conv indicate whether @@ -1544,14 +1553,17 @@ All parameter, weight, gradient are variables in Paddle. build_strategy = fluid.BuildStrategy() build_strategy.fuse_relu_depthwise_conv = True )DOC") - .def_property( - "fuse_broadcast_ops", - [](const BuildStrategy &self) { return self.fuse_broadcast_ops_; }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); - self.fuse_broadcast_ops_ = b; - }, - R"DOC(The type is BOOL, fuse_broadcast_op indicates whether + .def_property("fuse_broadcast_ops", + [](const BuildStrategy &self) { + return self.fuse_broadcast_ops_ == true || + self.fuse_broadcast_ops_ == boost::none; + }, + [](BuildStrategy &self, bool b) { + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); + self.fuse_broadcast_ops_ = b; + }, + R"DOC(The type is BOOL, fuse_broadcast_op indicates whether to fuse the broadcast ops. Note that, in Reduce mode, fusing broadcast ops may make the program faster. Because fusing broadcast OP equals delaying the execution of all @@ -1559,18 +1571,20 @@ All parameter, weight, gradient are variables in Paddle. for NCCLReduce operations for a period of time. Default False.)DOC") .def_property("fuse_all_optimizer_ops", [](const BuildStrategy &self) { - return self.fuse_all_optimizer_ops_; + return self.fuse_all_optimizer_ops_ == true || + self.fuse_all_optimizer_ops_ == boost::none; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), - "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.fuse_all_optimizer_ops_ = b; }) .def_property( "sync_batch_norm", [](const BuildStrategy &self) { return self.sync_batch_norm_; }, [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized."); + PADDLE_ENFORCE_EQ(!self.IsFinalized(), true, + "BuildStrategy is finlaized."); self.sync_batch_norm_ = b; }, R"DOC(The type is BOOL, sync_batch_norm indicates whether to use @@ -1637,7 +1651,10 @@ All parameter, weight, gradient are variables in Paddle. [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; }) .def_property( "fuse_all_reduce_ops", - [](const BuildStrategy &self) { return self.fuse_all_reduce_ops_; }, + [](const BuildStrategy &self) { + return self.fuse_all_reduce_ops_ == true || + self.fuse_all_reduce_ops_ == boost::none; + }, [](BuildStrategy &self, bool b) { self.fuse_all_reduce_ops_ = b; }) .def_property("enable_backward_optimizer_op_deps", [](const BuildStrategy &self) {