From 8ee837255ee0cb8a35cec19a64d0833b174f9b63 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 23 Apr 2018 11:25:59 +0800 Subject: [PATCH 1/4] fix send op handle local scope --- paddle/fluid/framework/details/send_op_handle.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc index 549b9d9abb..84e1f28b61 100644 --- a/paddle/fluid/framework/details/send_op_handle.cc +++ b/paddle/fluid/framework/details/send_op_handle.cc @@ -34,7 +34,9 @@ void SendOpHandle::RunImpl() { } in->generated_op_->Wait(dev_ctxes_[p]); } - this->RunAndRecordEvent([&] { op_->Run(*local_scope_, place_); }); + auto &tmp_scope = local_scope_->FindVar(kLocalExecScopeName)->Get(); + // auto &lod_tensor = tmp_scope->FindVar(var_name)->Get(); + this->RunAndRecordEvent([&] { op_->Run(*tmp_scope, place_); }); } std::string SendOpHandle::Name() const { return "send"; } From 2b06b4b4e998040e5047bde53216033f173b46a3 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 23 Apr 2018 11:57:54 +0800 Subject: [PATCH 2/4] updates follow up para exe --- paddle/fluid/framework/details/send_op_handle.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc index 84e1f28b61..0763f92171 100644 --- a/paddle/fluid/framework/details/send_op_handle.cc +++ b/paddle/fluid/framework/details/send_op_handle.cc @@ -35,8 +35,9 @@ void SendOpHandle::RunImpl() { in->generated_op_->Wait(dev_ctxes_[p]); } auto &tmp_scope = local_scope_->FindVar(kLocalExecScopeName)->Get(); - // auto &lod_tensor = tmp_scope->FindVar(var_name)->Get(); - this->RunAndRecordEvent([&] { op_->Run(*tmp_scope, place_); }); + // FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead + // lock. + op_->Run(*tmp_scope, place_); } std::string SendOpHandle::Name() const { return "send"; } From 7a395881d42017dd7ee32bcfa1e744708ed64c3c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 23 Apr 2018 14:29:24 +0800 Subject: [PATCH 3/4] Add customize_loss_grad option to PE --- .../framework/details/multi_devices_graph_builder.cc | 9 ++++++--- .../framework/details/multi_devices_graph_builder.h | 3 +++ paddle/fluid/framework/parallel_executor.cc | 12 +++++++----- paddle/fluid/framework/parallel_executor.h | 2 +- paddle/fluid/pybind/pybind.cc | 10 +++++----- python/paddle/fluid/parallel_executor.py | 6 ++++-- 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 002952436e..f27f184310 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -34,7 +34,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, - const std::vector &local_scopes, + const std::vector &local_scopes, bool skip_scale_loss, platform::NCCLContextMap *nccl_ctxs) : loss_var_name_(loss_var_name), places_(places), @@ -44,7 +44,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, - const std::unordered_set ¶ms, + const std::unordered_set ¶ms, bool skip_scale_loss, const std::vector &local_scopes) : loss_var_name_(loss_var_name), places_(places), @@ -53,6 +53,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( for (auto &p : params) { grad_names_.insert(GradVarName(p)); } + skip_scale_loss_ = skip_scale_loss; } void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, @@ -95,7 +96,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( // always use the first device CreateSendOp(&result, *op); } else if (IsScaleLossOp(*op)) { - CreateScaleLossGradOp(&result); + if (!skip_scale_loss_) { + CreateScaleLossGradOp(&result); + } is_forwarding = false; } else { CreateComputationalOps(&result, *op); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index b5ba2dbd3c..f2428b01ca 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -34,11 +34,13 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { const std::string &loss_var_name, const std::unordered_set ¶ms, const std::vector &local_scopes, + bool skip_scale_loss, platform::NCCLContextMap *nccl_ctxs); #else MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, + bool skip_scale_loss, const std::vector &local_scopes); #endif @@ -57,6 +59,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { #ifdef PADDLE_WITH_CUDA platform::NCCLContextMap *nccl_ctxs_; #endif + bool skip_scale_loss_; bool IsScaleLossOp(const OpDesc &op) const; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 67e02e2f11..a673fa5288 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -57,7 +57,8 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope, const std::vector &local_scopes, bool allow_op_delay) + Scope *scope, const std::vector &local_scopes, bool allow_op_delay, + bool customize_scale_loss) : member_(new ParallelExecutorPrivate(places)) { member_->global_scope_ = scope; @@ -90,12 +91,13 @@ ParallelExecutor::ParallelExecutor( // Step 2. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp #ifdef PADDLE_WITH_CUDA - details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, - params, member_->local_scopes_, - member_->nccl_ctxs_.get()); + details::MultiDevSSAGraphBuilder builder( + member_->places_, loss_var_name, params, member_->local_scopes_, + customize_scale_loss, member_->nccl_ctxs_.get()); #else details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, - params, member_->local_scopes_); + params, member_->local_scopes_, + customize_scale_loss); #endif auto graph = builder.Build(main_program); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index f4f283bb4b..49da123d98 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -40,7 +40,7 @@ class ParallelExecutor { const ProgramDesc& main_program, const std::string& loss_var_name, Scope* scope, const std::vector& local_scopes, - bool allow_op_delay); + bool allow_op_delay, bool customize_scale_loss); ~ParallelExecutor(); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 1f21e7abe7..b20b514fcd 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -502,11 +502,11 @@ All parameter, weight, gradient are variables in Paddle. const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope, std::vector &local_scopes, - bool allow_op_delay) { - new (&self) - ParallelExecutor(num_threads, use_event, places, params, - bcast_vars, main_program, loss_var_name, - scope, local_scopes, allow_op_delay); + bool allow_op_delay, bool customize_loss_grad) { + new (&self) ParallelExecutor(num_threads, use_event, places, + params, bcast_vars, main_program, + loss_var_name, scope, local_scopes, + allow_op_delay, customize_loss_grad); }) .def("bcast_params", &ParallelExecutor::BCastParamsToGPUs) // NOTE: even we return a vec* to Python use reference policy. diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index fbdd6fd449..364a3eba74 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -29,7 +29,8 @@ class ParallelExecutor(object): main_program=None, num_threads=None, allow_op_delay=False, - share_vars_from=None): + share_vars_from=None, + customize_loss_grad=False): """ ParallelExecutor can run program in parallel. @@ -122,7 +123,8 @@ class ParallelExecutor(object): loss_name if loss_name else '', scope, local_scopes, - allow_op_delay) + allow_op_delay, + customize_loss_grad) self.scope = scope def run(self, fetch_list, feed=None, feed_dict=None): From 55feba9b5ad624a97e81e9e69fb8ef09df0c084b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 23 Apr 2018 17:11:04 +0800 Subject: [PATCH 4/4] Fix CPU compile --- paddle/fluid/framework/details/multi_devices_graph_builder.cc | 4 ++-- paddle/fluid/framework/details/multi_devices_graph_builder.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index f27f184310..10d39e7793 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -44,8 +44,8 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, - const std::unordered_set ¶ms, bool skip_scale_loss, - const std::vector &local_scopes) + const std::unordered_set ¶ms, + const std::vector &local_scopes, bool skip_scale_loss) : loss_var_name_(loss_var_name), places_(places), local_scopes_(local_scopes) { diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index f2428b01ca..009c31b40c 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -40,8 +40,8 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, - bool skip_scale_loss, - const std::vector &local_scopes); + const std::vector &local_scopes, + bool skip_scale_loss); #endif std::unique_ptr Build(const ProgramDesc &program) const override;