From 681514e15ffbba78def454402f24d5a56f66546c Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 10 Sep 2018 12:20:08 +0800 Subject: [PATCH 01/17] Make all scope pointer to shared --- .../fast_threaded_ssa_graph_executor.cc | 3 +- .../fast_threaded_ssa_graph_executor.h | 11 ++++--- .../framework/details/fetch_op_handle.cc | 2 +- .../fluid/framework/details/fetch_op_handle.h | 4 +-- .../scope_buffered_ssa_graph_executor.cc | 3 +- .../scope_buffered_ssa_graph_executor.h | 5 +-- .../details/threaded_ssa_graph_executor.cc | 3 +- .../details/threaded_ssa_graph_executor.h | 11 ++++--- paddle/fluid/framework/parallel_executor.cc | 31 ++++++++++++------- paddle/fluid/framework/parallel_executor.h | 21 +++++++------ paddle/fluid/framework/scope.cc | 11 ++++--- paddle/fluid/framework/scope.h | 2 +- 12 files changed, 63 insertions(+), 44 deletions(-) diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index 7606f2bc06..a9b89614ae 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -22,7 +22,8 @@ namespace framework { namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, const std::vector &local_scopes, + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, const std::vector &places, std::unique_ptr &&graph) : strategy_(strategy), diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index dad3a231cb..fb615d70b7 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -29,16 +29,17 @@ namespace details { class OpHandleBase; class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, - const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + FastThreadedSSAGraphExecutor( + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; private: ExecutionStrategy strategy_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector places_; std::unique_ptr graph_; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index fe18b2060c..2f4aefd39d 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector *local_scopes) + std::vector> *local_scopes) : OpHandleBase(node), data_(data), offset_(offset), diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index 6ce42f92d7..a207e36b8a 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -29,7 +29,7 @@ namespace details { struct FetchOpHandle : public OpHandleBase { public: FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector *local_scopes); + std::vector> *local_scopes); ~FetchOpHandle(); @@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase { private: FeedFetchList *data_; size_t offset_; - std::vector *local_scopes_; + std::vector> *local_scopes_; std::vector tensors_; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index 5bd974d6b7..bf5671c679 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -23,7 +23,8 @@ namespace paddle { namespace framework { namespace details { ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, std::vector local_scopes, + ExecutionStrategy strategy, + std::vector> local_scopes, std::vector var_infos, std::vector places, std::unique_ptr &&underlying_executor) : strategy_(std::move(strategy)), diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index 5e87e0bf50..ec31755af5 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -37,7 +37,8 @@ struct VariableInfo { class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { public: ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, std::vector local_scopes, + ExecutionStrategy strategy, + std::vector> local_scopes, std::vector var_infos, std::vector places, std::unique_ptr&& underlying_executor); @@ -52,7 +53,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::unique_ptr underlying_executor_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector var_infos_; std::vector places_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index c9e331ef35..cc6f444363 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -21,7 +21,8 @@ namespace paddle { namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, const std::vector &local_scopes, + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, const std::vector &places, std::unique_ptr &&graph) : graph_(std::move(graph)), diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 9135c1f5d4..2a74af6c3d 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -38,10 +38,11 @@ namespace details { class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, - const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + ThreadedSSAGraphExecutor( + const ExecutionStrategy &strategy, + const std::vector> &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -57,7 +58,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: std::unique_ptr graph_; std::unique_ptr<::ThreadPool> pool_; - std::vector local_scopes_; + std::vector> local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; ExceptionHolder exception_holder_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 81cb24bdda..93c74deb3e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -39,7 +39,8 @@ std::unique_ptr ApplyParallelExecutorPass( const ProgramDesc &main_program, const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶m_names, - const std::vector &local_scopes, const bool use_cuda, + const std::vector> &local_scopes, + const bool use_cuda, #ifdef PADDLE_WITH_CUDA const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) { #else @@ -66,8 +67,8 @@ std::unique_ptr ApplyParallelExecutorPass( &loss_var_name); multi_devices_pass->SetNotOwned>( "params", ¶m_names); - multi_devices_pass->SetNotOwned>("local_scopes", - &local_scopes); + multi_devices_pass->SetNotOwned>>( + "local_scopes", &local_scopes); multi_devices_pass->SetNotOwned("strategy", &strategy); #ifdef PADDLE_WITH_CUDA @@ -100,8 +101,8 @@ class ParallelExecutorPrivate { : places_(places) {} std::vector places_; - std::vector local_scopes_; - Scope *global_scope_; + std::vector> local_scopes_; + std::shared_ptr global_scope_; std::unique_ptr executor_; #ifdef PADDLE_WITH_CUDA @@ -112,7 +113,7 @@ class ParallelExecutorPrivate { bool use_all_reduce_; }; -std::vector &ParallelExecutor::GetLocalScopes() { +std::vector> &ParallelExecutor::GetLocalScopes() { return member_->local_scopes_; } @@ -121,7 +122,8 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope, const std::vector &local_scopes, + const std::shared_ptr &scope, + const std::vector> &local_scopes, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { @@ -142,13 +144,13 @@ ParallelExecutor::ParallelExecutor( member_->own_local_scope_ = true; member_->local_scopes_.emplace_back(member_->global_scope_); for (size_t i = 1; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(&scope->NewScope()); + member_->local_scopes_.emplace_back(scope->NewSharedScope()); } } else { member_->own_local_scope_ = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope()); + member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope()); } } @@ -321,7 +323,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes( for (size_t i = 0; i < tensors.size(); ++i) { auto &map = tensors[i]; - auto *scope = member_->local_scopes_[i]; + auto &scope = member_->local_scopes_[i]; for (auto &pair : map) { auto *trg = scope->Var(pair.first)->GetMutable(); trg->ShareDataWith(pair.second); @@ -351,8 +353,15 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ParallelExecutor::~ParallelExecutor() { if (member_->own_local_scope_) { + std::vector local_scopes_ptrs; + local_scopes_ptrs.reserve(member_->local_scopes_.size()); for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { - member_->global_scope_->DeleteScope(member_->local_scopes_[i]); + local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get()); + member_->local_scopes_[i].reset(); + } + + for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) { + member_->global_scope_->DeleteScope(local_scopes_ptrs[i]); } } } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 5fb748fa20..ce1076e44b 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -39,19 +39,20 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor(const std::vector &places, - const std::unordered_set ¶ms, - const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, - const std::string &loss_var_name, Scope *scope, - const std::vector &local_scopes, - const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy, - size_t num_trainers = 1, size_t trainer_id = 0); + explicit ParallelExecutor( + const std::vector &places, + const std::unordered_set ¶ms, + const std::unordered_set &bcast_vars, + const ProgramDesc &main_program, const std::string &loss_var_name, + const std::shared_ptr &scope, + const std::vector> &local_scopes, + const ExecutionStrategy &exec_strategy, + const BuildStrategy &build_strategy, size_t num_trainers = 1, + size_t trainer_id = 0); ~ParallelExecutor(); - std::vector &GetLocalScopes(); + std::vector> &GetLocalScopes(); /** * Feed tensors to local scopes. The size of tensors should be equal to the diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 50f374e370..fa6bf4429d 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { std::unique_lock lock(mutex_); - kids_.push_back(new Scope(this)); - return *kids_.back(); + kids_.push_back(std::shared_ptr(new Scope(this))); + return kids_.back().get(); } Variable* Scope::Var(const std::string& name) { @@ -68,7 +68,6 @@ const Scope* Scope::FindScope(const Variable* var) const { void Scope::DropKids() { std::unique_lock lock(mutex_); - for (Scope* s : kids_) delete s; kids_.clear(); } @@ -84,8 +83,12 @@ std::vector Scope::LocalVarNames() const { void Scope::DeleteScope(Scope* scope) const { std::unique_lock lock(mutex_); - auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); + auto it = std::find_if(this->kids_.begin(), this->kids_.end(), + [&scope](const std::shared_ptr& kid) { + return kid.get() == scope; + }); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); + it->reset(); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index e246241c0a..0ba5d34798 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -105,7 +105,7 @@ class Scope { Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. - mutable std::list kids_; + mutable std::list> kids_; Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); From dc863aac7edeccbe8362d625b2c1e6eeca885000 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 10 Sep 2018 14:29:19 +0800 Subject: [PATCH 02/17] Add kids exists detection in Scope --- .../fast_threaded_ssa_graph_executor.cc | 3 +- .../fast_threaded_ssa_graph_executor.h | 11 +++--- .../framework/details/fetch_op_handle.cc | 2 +- .../fluid/framework/details/fetch_op_handle.h | 4 +-- .../scope_buffered_ssa_graph_executor.cc | 3 +- .../scope_buffered_ssa_graph_executor.h | 5 ++- .../details/threaded_ssa_graph_executor.cc | 3 +- .../details/threaded_ssa_graph_executor.h | 11 +++--- paddle/fluid/framework/parallel_executor.cc | 34 ++++++++----------- paddle/fluid/framework/parallel_executor.h | 21 ++++++------ paddle/fluid/framework/scope.cc | 17 ++++++---- paddle/fluid/framework/scope.h | 5 ++- .../test_image_classification_resnet.py | 5 +-- .../test_image_classification_vgg.py | 5 +-- .../test_recognize_digits_conv.py | 5 +-- .../test_recognize_digits_mlp.py | 5 +-- 16 files changed, 60 insertions(+), 79 deletions(-) diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index a9b89614ae..7606f2bc06 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -22,8 +22,7 @@ namespace framework { namespace details { FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : strategy_(strategy), diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index fb615d70b7..dad3a231cb 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -29,17 +29,16 @@ namespace details { class OpHandleBase; class FastThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - FastThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + FastThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); FeedFetchList Run(const std::vector &fetch_tensors) override; const ir::Graph &Graph() const override; private: ExecutionStrategy strategy_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; std::unique_ptr graph_; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 2f4aefd39d..fe18b2060c 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -22,7 +22,7 @@ namespace framework { namespace details { FetchOpHandle::FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes) + std::vector *local_scopes) : OpHandleBase(node), data_(data), offset_(offset), diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index a207e36b8a..6ce42f92d7 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -29,7 +29,7 @@ namespace details { struct FetchOpHandle : public OpHandleBase { public: FetchOpHandle(ir::Node *node, FeedFetchList *data, size_t offset, - std::vector> *local_scopes); + std::vector *local_scopes); ~FetchOpHandle(); @@ -47,7 +47,7 @@ struct FetchOpHandle : public OpHandleBase { private: FeedFetchList *data_; size_t offset_; - std::vector> *local_scopes_; + std::vector *local_scopes_; std::vector tensors_; }; diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index bf5671c679..5bd974d6b7 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -23,8 +23,7 @@ namespace paddle { namespace framework { namespace details { ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr &&underlying_executor) : strategy_(std::move(strategy)), diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index ec31755af5..5e87e0bf50 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -37,8 +37,7 @@ struct VariableInfo { class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { public: ScopeBufferedSSAGraphExecutor( - ExecutionStrategy strategy, - std::vector> local_scopes, + ExecutionStrategy strategy, std::vector local_scopes, std::vector var_infos, std::vector places, std::unique_ptr&& underlying_executor); @@ -53,7 +52,7 @@ class ScopeBufferedSSAGraphExecutor : public SSAGraphExecutor { ExecutionStrategy strategy_; std::unique_ptr underlying_executor_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector var_infos_; std::vector places_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index cc6f444363..c9e331ef35 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -21,8 +21,7 @@ namespace paddle { namespace framework { namespace details { ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, + const ExecutionStrategy &strategy, const std::vector &local_scopes, const std::vector &places, std::unique_ptr &&graph) : graph_(std::move(graph)), diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 2a74af6c3d..9135c1f5d4 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -38,11 +38,10 @@ namespace details { class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: - ThreadedSSAGraphExecutor( - const ExecutionStrategy &strategy, - const std::vector> &local_scopes, - const std::vector &places, - std::unique_ptr &&graph); + ThreadedSSAGraphExecutor(const ExecutionStrategy &strategy, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); const ir::Graph &Graph() const override { return *graph_; } // Run a SSAGraph by a thread pool @@ -58,7 +57,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { private: std::unique_ptr graph_; std::unique_ptr<::ThreadPool> pool_; - std::vector> local_scopes_; + std::vector local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; ExceptionHolder exception_holder_; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 93c74deb3e..5b8c75a93d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -39,8 +39,7 @@ std::unique_ptr ApplyParallelExecutorPass( const ProgramDesc &main_program, const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶m_names, - const std::vector> &local_scopes, - const bool use_cuda, + const std::vector &local_scopes, const bool use_cuda, #ifdef PADDLE_WITH_CUDA const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) { #else @@ -67,8 +66,8 @@ std::unique_ptr ApplyParallelExecutorPass( &loss_var_name); multi_devices_pass->SetNotOwned>( "params", ¶m_names); - multi_devices_pass->SetNotOwned>>( - "local_scopes", &local_scopes); + multi_devices_pass->SetNotOwned>("local_scopes", + &local_scopes); multi_devices_pass->SetNotOwned("strategy", &strategy); #ifdef PADDLE_WITH_CUDA @@ -101,8 +100,8 @@ class ParallelExecutorPrivate { : places_(places) {} std::vector places_; - std::vector> local_scopes_; - std::shared_ptr global_scope_; + std::vector local_scopes_; + Scope *global_scope_; std::unique_ptr executor_; #ifdef PADDLE_WITH_CUDA @@ -113,7 +112,7 @@ class ParallelExecutorPrivate { bool use_all_reduce_; }; -std::vector> &ParallelExecutor::GetLocalScopes() { +std::vector &ParallelExecutor::GetLocalScopes() { return member_->local_scopes_; } @@ -122,8 +121,7 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const std::unordered_set &bcast_vars, const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, + Scope *scope, const std::vector &local_scopes, const ExecutionStrategy &exec_strategy, const BuildStrategy &build_strategy, size_t num_trainers, size_t trainer_id) : member_(new ParallelExecutorPrivate(places)) { @@ -144,13 +142,13 @@ ParallelExecutor::ParallelExecutor( member_->own_local_scope_ = true; member_->local_scopes_.emplace_back(member_->global_scope_); for (size_t i = 1; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(scope->NewSharedScope()); + member_->local_scopes_.emplace_back(&scope->NewScope()); } } else { member_->own_local_scope_ = false; PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); for (size_t i = 0; i < member_->places_.size(); ++i) { - member_->local_scopes_.emplace_back(local_scopes[i]->NewSharedScope()); + member_->local_scopes_.emplace_back(&local_scopes[i]->NewScope()); } } @@ -323,7 +321,7 @@ void ParallelExecutor::FeedTensorsIntoLocalScopes( for (size_t i = 0; i < tensors.size(); ++i) { auto &map = tensors[i]; - auto &scope = member_->local_scopes_[i]; + auto *scope = member_->local_scopes_[i]; for (auto &pair : map) { auto *trg = scope->Var(pair.first)->GetMutable(); trg->ShareDataWith(pair.second); @@ -353,15 +351,11 @@ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes( ParallelExecutor::~ParallelExecutor() { if (member_->own_local_scope_) { - std::vector local_scopes_ptrs; - local_scopes_ptrs.reserve(member_->local_scopes_.size()); for (size_t i = 1; i < member_->local_scopes_.size(); ++i) { - local_scopes_ptrs.emplace_back(member_->local_scopes_[i].get()); - member_->local_scopes_[i].reset(); - } - - for (size_t i = 0; i != local_scopes_ptrs.size(); ++i) { - member_->global_scope_->DeleteScope(local_scopes_ptrs[i]); + Scope *local_scope = member_->local_scopes_[i]; + if (member_->global_scope_->HasKid(local_scope)) { + member_->global_scope_->DeleteScope(local_scope); + } } } } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index ce1076e44b..5fb748fa20 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -39,20 +39,19 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor( - const std::vector &places, - const std::unordered_set ¶ms, - const std::unordered_set &bcast_vars, - const ProgramDesc &main_program, const std::string &loss_var_name, - const std::shared_ptr &scope, - const std::vector> &local_scopes, - const ExecutionStrategy &exec_strategy, - const BuildStrategy &build_strategy, size_t num_trainers = 1, - size_t trainer_id = 0); + explicit ParallelExecutor(const std::vector &places, + const std::unordered_set ¶ms, + const std::unordered_set &bcast_vars, + const ProgramDesc &main_program, + const std::string &loss_var_name, Scope *scope, + const std::vector &local_scopes, + const ExecutionStrategy &exec_strategy, + const BuildStrategy &build_strategy, + size_t num_trainers = 1, size_t trainer_id = 0); ~ParallelExecutor(); - std::vector> &GetLocalScopes(); + std::vector &GetLocalScopes(); /** * Feed tensors to local scopes. The size of tensors should be equal to the diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index fa6bf4429d..2be655b89a 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -38,8 +38,8 @@ Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { std::unique_lock lock(mutex_); - kids_.push_back(std::shared_ptr(new Scope(this))); - return kids_.back().get(); + kids_.push_back(new Scope(this)); + return *kids_.back(); } Variable* Scope::Var(const std::string& name) { @@ -68,9 +68,16 @@ const Scope* Scope::FindScope(const Variable* var) const { void Scope::DropKids() { std::unique_lock lock(mutex_); + for (Scope* s : kids_) delete s; kids_.clear(); } +bool Scope::HasKid(const Scope* scope) const { + std::unique_lock lock(mutex_); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); + return it != this->kids_.end(); +} + std::vector Scope::LocalVarNames() const { std::unique_lock lock(mutex_); std::vector known_vars; @@ -83,12 +90,8 @@ std::vector Scope::LocalVarNames() const { void Scope::DeleteScope(Scope* scope) const { std::unique_lock lock(mutex_); - auto it = std::find_if(this->kids_.begin(), this->kids_.end(), - [&scope](const std::shared_ptr& kid) { - return kid.get() == scope; - }); + auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); - it->reset(); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 0ba5d34798..b6165a595d 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -71,6 +71,9 @@ class Scope { /// Drop all kids scopes belonged to this scope. void DropKids(); + /// Find if a scope exists in the kid scopes + bool HasKid(const Scope* scope) const; + // enumerate all the variables current contains. std::vector LocalVarNames() const; @@ -105,7 +108,7 @@ class Scope { Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. - mutable std::list> kids_; + mutable std::list kids_; Scope const* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index e5ae95e2d9..de276755bb 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -178,7 +178,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index ff91be72c9..dd547f3448 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -152,7 +152,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index fa72c939e5..973308498b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -155,7 +155,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index 440d2a3083..cb4aeb430e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -137,7 +137,4 @@ if __name__ == '__main__': for parallel in (False, True): if use_cuda and not core.is_compiled_with_cuda(): continue - # TODO(minqiyang): remove this line after fixing the deletion - # order problem of Scope in ParallelExecutor in manylinux - if six.PY2: - main(use_cuda=use_cuda, parallel=parallel) + main(use_cuda=use_cuda, parallel=parallel) From e0436ad8bbaed57b9c2c60f100d1e1f86fe42e07 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 11 Sep 2018 16:07:07 +0800 Subject: [PATCH 03/17] refine fusion lstm infershape --- paddle/fluid/framework/operator.cc | 277 ++++++++----------- paddle/fluid/framework/shape_runtime_infer.h | 86 ++++++ paddle/fluid/operators/fusion_lstm_op.cc | 81 ++++-- 3 files changed, 260 insertions(+), 184 deletions(-) create mode 100644 paddle/fluid/framework/shape_runtime_infer.h diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index d58d6e4f3e..36025db7ba 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/shape_inference.h" +#include "paddle/fluid/framework/shape_runtime_infer.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" @@ -458,187 +459,147 @@ bool OpSupportGPU(const std::string& op_type) { return false; } -class RuntimeInferShapeContext : public InferShapeContext { - public: - RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope) - : op_(op), scope_(scope) {} - - bool HasInput(const std::string& name) const override { - if (!op_.HasInputs(name)) { - return false; - } - auto& ins = Inputs(name); - size_t length = ins.size(); - if (length == 0) { - return false; - } - PADDLE_ENFORCE_EQ(length, 1UL, - "Input %s should not have more than one inputs", name); - auto ipt = ins[0]; - auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); - return var != nullptr; +bool RuntimeInferShapeContext::HasInput(const std::string& name) const { + if (!op_.HasInputs(name)) { + return false; } - - bool HasOutput(const std::string& name) const override { - if (!op_.HasOutputs(name)) { - return false; - } - auto& outs = Outputs(name); - size_t length = outs.size(); - if (length == 0) { - return false; - } - PADDLE_ENFORCE_EQ(length, 1UL, - "Output %s should not have more than one inputs", name); - auto ipt = outs[0]; - auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); - return var != nullptr; + auto& ins = Inputs(name); + size_t length = ins.size(); + if (length == 0) { + return false; } + PADDLE_ENFORCE_EQ(length, 1UL, + "Input %s should not have more than one inputs", name); + auto ipt = ins[0]; + auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); + return var != nullptr; +} - bool HasInputs(const std::string& name) const override { - if (!op_.HasInputs(name)) { - return false; - } - auto inputs = op_.Inputs(name); - if (inputs.empty()) { - return false; - } - for (auto& input : inputs) { - if (scope_.FindVar(input) == nullptr) { - return false; - } - } - return true; +bool RuntimeInferShapeContext::HasOutput(const std::string& name) const { + if (!op_.HasOutputs(name)) { + return false; } + auto& outs = Outputs(name); + size_t length = outs.size(); + if (length == 0) { + return false; + } + PADDLE_ENFORCE_EQ(length, 1UL, + "Output %s should not have more than one inputs", name); + auto ipt = outs[0]; + auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); + return var != nullptr; +} - bool HasOutputs(const std::string& name) const override { - if (!op_.HasOutputs(name)) { - return false; - } - auto outputs = op_.Outputs(name); - if (outputs.empty()) { +bool RuntimeInferShapeContext::HasInputs(const std::string& name) const { + if (!op_.HasInputs(name)) { + return false; + } + auto inputs = op_.Inputs(name); + if (inputs.empty()) { + return false; + } + for (auto& input : inputs) { + if (scope_.FindVar(input) == nullptr) { return false; } - for (auto& output : outputs) { - if (scope_.FindVar(output) == nullptr) { - return false; - } - } - return true; } + return true; +} - AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } - - const std::vector& Inputs( - const std::string& name) const override { - return op_.Inputs(name); +bool RuntimeInferShapeContext::HasOutputs(const std::string& name) const { + if (!op_.HasOutputs(name)) { + return false; } - - const std::vector& Outputs( - const std::string& name) const override { - return op_.Outputs(name); + auto outputs = op_.Outputs(name); + if (outputs.empty()) { + return false; } + for (auto& output : outputs) { + if (scope_.FindVar(output) == nullptr) { + return false; + } + } + return true; +} - void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) const override { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); - Variable* in_var = scope_.FindVar(Inputs(in)[i]); - Variable* out_var = scope_.FindVar(Outputs(out)[j]); - if (!in_var->IsType()) return; - PADDLE_ENFORCE(out_var->IsType(), - "The %d-th output of Output(%s) must be LoDTensor.", j, out); - auto in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); - out_tensor->set_lod(in_tensor.lod()); +void RuntimeInferShapeContext::ShareLoD(const std::string& in, + const std::string& out, size_t i, + size_t j) const { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + Variable* in_var = scope_.FindVar(Inputs(in)[i]); + Variable* out_var = scope_.FindVar(Outputs(out)[j]); + if (!in_var->IsType()) return; + PADDLE_ENFORCE(out_var->IsType(), + "The %d-th output of Output(%s) must be LoDTensor.", j, out); + auto in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); + out_tensor->set_lod(in_tensor.lod()); // TODO(dzhwinter) : reuse ShareLoD in most operators. // Need to call ShareLayout explicitly in sequence related ops. // Shall we have a better method to shared info between in/out Tensor? #ifdef PADDLE_WITH_MKLDNN - // Fix me: ugly workaround below - // Correct solution: - // set_layout() should NOT be called here (i.e. ShareLoD). Instead, - // layout of output tensor should be set "manually" in Compute() - // of each OPKernel. The reason layout should NOT be shared between - // input and output "automatically" (now by InferShape()->ShareLoD()) - // is that layout transform may occur after InferShape(). - // Workaround: - // Skip set_layout() when input layout is kMKLDNN - // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN - // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called - // in Compute() - if (in_tensor.layout() != DataLayout::kMKLDNN) + // Fix me: ugly workaround below + // Correct solution: + // set_layout() should NOT be called here (i.e. ShareLoD). Instead, + // layout of output tensor should be set "manually" in Compute() + // of each OPKernel. The reason layout should NOT be shared between + // input and output "automatically" (now by InferShape()->ShareLoD()) + // is that layout transform may occur after InferShape(). + // Workaround: + // Skip set_layout() when input layout is kMKLDNN + // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN + // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // in Compute() + if (in_tensor.layout() != DataLayout::kMKLDNN) #endif - out_tensor->set_layout(in_tensor.layout()); - } - - void ShareLayout(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) const { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); - Variable* in_var = scope_.FindVar(Inputs(in)[i]); - Variable* out_var = scope_.FindVar(Outputs(out)[j]); - if (!in_var->IsType()) return; - PADDLE_ENFORCE(out_var->IsType(), - "The %d-th output of Output(%s) must be LoDTensor.", j, out); - auto in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); out_tensor->set_layout(in_tensor.layout()); - } - - bool IsRuntime() const override { return true; } - - protected: - DDim GetDim(const std::string& name) const override { - Variable* var = scope_.FindVar(name); - PADDLE_ENFORCE_NOT_NULL(var); - if (var->IsType()) { - return var->Get().dims(); - } else if (var->IsType()) { - return var->Get().GetCompleteDims(); - } else { - PADDLE_THROW( - "Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's " - "type_id is %s.", - name, var->Type().name()); - } - } - - std::vector GetRepeatedDims(const std::string& name) const override { - PADDLE_THROW("Only compile time support this method"); - } - - void SetDim(const std::string& name, const DDim& dim) override { - Variable* var = scope_.FindVar(name); - if (var->IsType()) { - var->GetMutable()->Resize(dim); - } else if (var->IsType()) { - var->GetMutable()->set_height(dim[0]); - } else { - PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", - name, var->Type().name()); - } - } - - void SetRepeatedDims(const std::string& name, - const std::vector& dims) override { - PADDLE_THROW("Only compile time support this method"); - } +} - proto::VarType::Type GetVarType(const std::string& name) const override { - auto* var = scope_.FindVar(name); - return ToVarType(var->Type()); +void RuntimeInferShapeContext::ShareLayout(const std::string& in, + const std::string& out, size_t i, + size_t j) const { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + Variable* in_var = scope_.FindVar(Inputs(in)[i]); + Variable* out_var = scope_.FindVar(Outputs(out)[j]); + if (!in_var->IsType()) return; + PADDLE_ENFORCE(out_var->IsType(), + "The %d-th output of Output(%s) must be LoDTensor.", j, out); + auto in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); + out_tensor->set_layout(in_tensor.layout()); +} + +DDim RuntimeInferShapeContext::GetDim(const std::string& name) const { + Variable* var = scope_.FindVar(name); + PADDLE_ENFORCE_NOT_NULL(var); + if (var->IsType()) { + return var->Get().dims(); + } else if (var->IsType()) { + return var->Get().GetCompleteDims(); + } else { + PADDLE_THROW( + "Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's " + "type_id is %s.", + name, var->Type().name()); } +} - InferShapeVarPtr GetVarPtr(const std::string& name) override { - return scope_.FindVar(name); +void RuntimeInferShapeContext::SetDim(const std::string& name, + const DDim& dim) { + Variable* var = scope_.FindVar(name); + if (var->IsType()) { + var->GetMutable()->Resize(dim); + } else if (var->IsType()) { + var->GetMutable()->set_height(dim[0]); + } else { + PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", name, + var->Type().name()); } - - private: - const OperatorBase& op_; - const Scope& scope_; -}; +} static void CheckTensorNANOrInf(const std::string& name, const framework::Tensor& tensor) { diff --git a/paddle/fluid/framework/shape_runtime_infer.h b/paddle/fluid/framework/shape_runtime_infer.h new file mode 100644 index 0000000000..04d4e33f7a --- /dev/null +++ b/paddle/fluid/framework/shape_runtime_infer.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/shape_inference.h" +#include "paddle/fluid/framework/var_type.h" + +namespace paddle { +namespace framework { + +class RuntimeInferShapeContext : public InferShapeContext { + public: + RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} + + bool HasInput(const std::string& name) const override; + bool HasOutput(const std::string& name) const override; + bool HasInputs(const std::string& name) const override; + bool HasOutputs(const std::string& name) const override; + + const OperatorBase& OpBase() const { return op_; } + + const Scope& InferScope() const { return scope_; } + AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } + + const std::vector& Inputs( + const std::string& name) const override { + return op_.Inputs(name); + } + + const std::vector& Outputs( + const std::string& name) const override { + return op_.Outputs(name); + } + + void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, + size_t j = 0) const override; + + void ShareLayout(const std::string& in, const std::string& out, size_t i = 0, + size_t j = 0) const; + + bool IsRuntime() const override { return true; } + + protected: + DDim GetDim(const std::string& name) const override; + void SetDim(const std::string& name, const DDim& dim) override; + + std::vector GetRepeatedDims(const std::string& name) const override { + PADDLE_THROW("Only compile time support this method"); + } + void SetRepeatedDims(const std::string& name, + const std::vector& dims) override { + PADDLE_THROW("Only compile time support this method"); + } + + proto::VarType::Type GetVarType(const std::string& name) const override { + auto* var = scope_.FindVar(name); + return ToVarType(var->Type()); + } + + InferShapeVarPtr GetVarPtr(const std::string& name) override { + return scope_.FindVar(name); + } + + private: + const OperatorBase& op_; + const Scope& scope_; +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/operators/fusion_lstm_op.cc b/paddle/fluid/operators/fusion_lstm_op.cc index ef23ab3f98..ae9d5d78ae 100644 --- a/paddle/fluid/operators/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fusion_lstm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_lstm_op.h" #include +#include "paddle/fluid/framework/shape_runtime_infer.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -24,26 +25,54 @@ namespace paddle { namespace operators { void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("WeightX"), - "Input(WeightX) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("WeightH"), - "Input(WeightH) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Bias"), - "Input(Bias) of LSTM should not be null."); - - PADDLE_ENFORCE(ctx->HasOutput("XX"), - "Output(XX) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Hidden"), - "Output(Hidden) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Cell"), - "Output(Cell) of LSTM should not be null."); + auto* runtime_ctx = dynamic_cast(ctx); + if (runtime_ctx == nullptr) { + LOG(FATAL) << "Should have runtime infer context"; + } + const auto& ins = runtime_ctx->OpBase().Inputs(); + const auto& outs = runtime_ctx->OpBase().Outputs(); + const auto& scope = runtime_ctx->InferScope(); + const auto ins_end = ins.end(); + const auto outs_end = outs.end(); + auto fair_input = [&](const std::string& name) -> bool { + auto it = ins.find(name); + if (it == ins_end) { + return false; + } + const auto& in = it->second; + if (in.size() != 1 || in[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(in[0]) != nullptr; + }; + auto fair_output = [&](const std::string& name) -> bool { + auto it = outs.find(name); + if (it == outs_end) { + return false; + } + const auto& out = it->second; + if (out.size() != 1 || out[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(out[0]) != nullptr; + }; + + PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of LSTM."); + PADDLE_ENFORCE(fair_input("WeightX"), + "Assert only one Input(WeightX) of LSTM."); + PADDLE_ENFORCE(fair_input("WeightH"), + "Assert only one Input(WeightH) of LSTM."); + PADDLE_ENFORCE(fair_input("Bias"), "Assert only one Input(Bias) of LSTM."); + PADDLE_ENFORCE(fair_output("XX"), "Assert only one Output(XX) of LSTM."); + PADDLE_ENFORCE(fair_output("Hidden"), + "Assert only one Output(Hidden) of LSTM."); + PADDLE_ENFORCE(fair_output("Cell"), "Assert only one Output(Cell) of LSTM."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank must be 2."); - if (ctx->HasInput("H0")) { - PADDLE_ENFORCE(ctx->HasInput("C0"), + if (fair_input("H0")) { + PADDLE_ENFORCE(fair_input("C0"), "Input(Cell) and Input(Hidden) of LSTM should not " "be null at the same time."); auto h_dims = ctx->GetInputDim("H0"); @@ -95,16 +124,16 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; - PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), - "Output(BatchedInput) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedHidden"), - "Output(BatchedHidden) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedCell"), - "Output(BatchedCell) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), - "Output(ReorderedH0) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ReorderedC0"), - "Output(ReorderedC0) of LSTM should not be null."); + PADDLE_ENFORCE(fair_output("BatchedInput"), + "Assert only one Output(BatchedInput) of LSTM."); + PADDLE_ENFORCE(fair_output("BatchedHidden"), + "Assert only one Output(BatchedHidden) of LSTM."); + PADDLE_ENFORCE(fair_output("BatchedCell"), + "Assert only one Output(BatchedCell) of LSTM."); + PADDLE_ENFORCE(fair_output("ReorderedH0"), + "Assert only one Output(ReorderedH0) of LSTM"); + PADDLE_ENFORCE(fair_output("ReorderedC0"), + "Assert only one Output(ReorderedC0) of LSTM."); ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); ctx->SetOutputDim("BatchedHidden", out_dims); ctx->SetOutputDim("BatchedCell", out_dims); From a5556d44175931682bb049451639948c0da7ed6e Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 11 Sep 2018 17:49:54 +0800 Subject: [PATCH 04/17] refine attentionlstm infershape --- paddle/fluid/operators/attention_lstm_op.cc | 88 ++++++++++++++------- 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 39b0c85699..ac4ddb5502 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" #include +#include "paddle/fluid/framework/shape_runtime_infer.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -23,29 +24,60 @@ namespace paddle { namespace operators { void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("C0"), - "Input(C0) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("LSTMWeight"), - "Input(LSTMWeight) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("LSTMBias"), - "Input(LSTMBias) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasInput("AttentionWeight"), - "Input(AttentionWeight) of AttentionLSTM should not be null."); - - PADDLE_ENFORCE(ctx->HasOutput("Hidden"), - "Output(Hidden) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Cell"), - "Output(Cell) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("AttentionedX"), - "Output(AttentionedX) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("AttentionFCOut"), - "Output(AttentionFCOut) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("LSTMX"), - "Output(LSTMX) of AttentionLSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("LSTMOUT"), - "Output(LSTMOUT) of AttentionLSTM should not be null."); + auto* runtime_ctx = dynamic_cast(ctx); + if (runtime_ctx == nullptr) { + LOG(FATAL) << "Should have runtime infer context"; + } + const auto& ins = runtime_ctx->OpBase().Inputs(); + const auto& outs = runtime_ctx->OpBase().Outputs(); + const auto& scope = runtime_ctx->InferScope(); + const auto ins_end = ins.end(); + const auto outs_end = outs.end(); + auto fair_input = [&](const std::string& name) -> bool { + auto it = ins.find(name); + if (it == ins_end) { + return false; + } + const auto& in = it->second; + if (in.size() != 1 || in[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(in[0]) != nullptr; + }; + auto fair_output = [&](const std::string& name) -> bool { + auto it = outs.find(name); + if (it == outs_end) { + return false; + } + const auto& out = it->second; + if (out.size() != 1 || out[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(out[0]) != nullptr; + }; + + PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of AttentionLSTM."); + PADDLE_ENFORCE(fair_input("C0"), + "Assert only one Input(C0) of AttentionLSTM."); + PADDLE_ENFORCE(fair_input("LSTMWeight"), + "Assert only one Input(LSTMWeight) of AttentionLSTM."); + PADDLE_ENFORCE(fair_input("LSTMBias"), + "Assert only one Input(LSTMBias) of AttentionLSTM."); + PADDLE_ENFORCE(fair_input("AttentionWeight"), + "Assert only one Input(AttentionWeight) of AttentionLSTM."); + + PADDLE_ENFORCE(fair_output("Hidden"), + "Assert only one Output(Hidden) of AttentionLSTM."); + PADDLE_ENFORCE(fair_output("Cell"), + "Assert only one Output(Cell) of AttentionLSTM."); + PADDLE_ENFORCE(fair_output("AttentionedX"), + "Assert only one Output(AttentionedX) of AttentionLSTM."); + PADDLE_ENFORCE(fair_output("AttentionFCOut"), + "Assert only one Output(AttentionFCOut) of AttentionLSTM."); + PADDLE_ENFORCE(fair_output("LSTMX"), + "Assert only one Output(LSTMX) of AttentionLSTM."); + PADDLE_ENFORCE(fair_output("LSTMOUT"), + "Assert only one Output(LSTMOUT) of AttentionLSTM."); auto x_dims = ctx->GetInputDim("X"); const int M = x_dims[1]; @@ -65,7 +97,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(c_dims.size(), 2, "Input(C0)'s rank must be 2."); PADDLE_ENFORCE_EQ(c_dims[1], D, "C0 dims should be N x %d.", D); - if (ctx->HasInput("H0")) { + if (fair_input("H0")) { auto h_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE(h_dims == c_dims, "The dimension of Input(H0) and Input(C0) " @@ -79,7 +111,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionWeight shapes must be (%d + %d) * 1.", M, D); PADDLE_ENFORCE_EQ(atten_w_dims[1], 1, "AttentionWeight shapes must be (%d + %d) * 1.", M, D); - if (ctx->HasInput("AttentionBias")) { + if (fair_input("AttentionBias")) { auto atten_b_dims = ctx->GetInputDim("AttentionBias"); PADDLE_ENFORCE_EQ(atten_b_dims.size(), 2, "Input(AttentionBias)'s rank must be 2."); @@ -89,7 +121,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionBias shapes must be 1 * 1."); } - if (ctx->HasInput("AttentionScalar")) { + if (fair_input("AttentionScalar")) { auto dims = ctx->GetInputDim("AttentionScalar"); PADDLE_ENFORCE_EQ(dims.size(), 2, "Input(AttentionScalar)'s rank must be 2."); @@ -97,10 +129,10 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(dims[1], 1, "AttentionScalar shapes must be 1 * 1."); } - if (ctx->HasInput("AttentionScalarBias")) { + if (fair_input("AttentionScalarBias")) { auto dims = ctx->GetInputDim("AttentionScalarBias"); PADDLE_ENFORCE( - ctx->HasInput("AttentionScalar"), + fair_input("AttentionScalar"), "AttentionScalar should not be null when have AttentionScalarBias."); PADDLE_ENFORCE_EQ(dims.size(), 2, "Input(AttentionScalarBias)'s rank must be 2."); From 916f42bcbf7bc308f2135be5f341b8628cc883dc Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 11 Sep 2018 18:00:20 +0800 Subject: [PATCH 05/17] refine fusion gru infershape --- paddle/fluid/operators/fusion_gru_op.cc | 65 +++++++++++++++++++------ 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/operators/fusion_gru_op.cc b/paddle/fluid/operators/fusion_gru_op.cc index 916f84cb4a..bcdcb2ac4d 100644 --- a/paddle/fluid/operators/fusion_gru_op.cc +++ b/paddle/fluid/operators/fusion_gru_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_gru_op.h" #include // for memcpy #include +#include "paddle/fluid/framework/shape_runtime_infer.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -25,14 +26,46 @@ namespace paddle { namespace operators { void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasInput("WeightX"), - "Input(WeightX) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasInput("WeightH"), - "Input(WeightH) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("XX"), "Output(XX) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Hidden"), - "Output(Hidden) of GRU should not be null."); + auto* runtime_ctx = dynamic_cast(ctx); + if (runtime_ctx == nullptr) { + LOG(FATAL) << "Should have runtime infer context"; + } + const auto& ins = runtime_ctx->OpBase().Inputs(); + const auto& outs = runtime_ctx->OpBase().Outputs(); + const auto& scope = runtime_ctx->InferScope(); + const auto ins_end = ins.end(); + const auto outs_end = outs.end(); + auto fair_input = [&](const std::string& name) -> bool { + auto it = ins.find(name); + if (it == ins_end) { + return false; + } + const auto& in = it->second; + if (in.size() != 1 || in[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(in[0]) != nullptr; + }; + auto fair_output = [&](const std::string& name) -> bool { + auto it = outs.find(name); + if (it == outs_end) { + return false; + } + const auto& out = it->second; + if (out.size() != 1 || out[0] == framework::kEmptyVarName) { + return false; + } + return scope.FindVar(out[0]) != nullptr; + }; + + PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of GRU."); + PADDLE_ENFORCE(fair_input("WeightX"), + "Assert only one Input(WeightX) of GRU."); + PADDLE_ENFORCE(fair_input("WeightH"), + "Assert only one Input(WeightH) of GRU."); + PADDLE_ENFORCE(fair_output("XX"), "Assert only one Output(XX) of GRU."); + PADDLE_ENFORCE(fair_output("Hidden"), + "Assert only one Output(Hidden) of GRU."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank must be 2."); @@ -58,12 +91,12 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { "should be 3 * %d.", frame_size); - if (ctx->HasInput("H0")) { + if (fair_input("H0")) { auto h0_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, "The width of H0 must be equal to frame_size."); } - if (ctx->HasInput("Bias")) { + if (fair_input("Bias")) { auto b_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2."); PADDLE_ENFORCE_EQ(b_dims[0], 1, @@ -79,12 +112,12 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; - PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), - "Output(ReorderedH0) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), - "Output(BatchedInput) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedOut"), - "Output(BatchedOut) of GRU should not be null."); + PADDLE_ENFORCE(fair_output("ReorderedH0"), + "Assert only one Output(ReorderedH0) of GRU."); + PADDLE_ENFORCE(fair_output("BatchedInput"), + "Assert only one Output(BatchedInput) of GRU."); + PADDLE_ENFORCE(fair_output("BatchedOut"), + "Assert only one Output(BatchedOut) of GRU."); ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); ctx->SetOutputDim("BatchedOut", out_dims); } From 8e0fe035d478a8bfb7bea888b986eafa827dcbf1 Mon Sep 17 00:00:00 2001 From: superjomn Date: Tue, 11 Sep 2018 10:16:19 +0000 Subject: [PATCH 06/17] fix ner_test when bs>1 --- paddle/fluid/inference/tests/api/analyzer_ner_tester.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 661b047ed7..6e8e43add7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -144,8 +144,9 @@ void TestChineseNERPrediction(bool use_analysis) { size_t num_samples; for (int i = 0; i < FLAGS_repeat; i++) { DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + // Just one batch, the num_samples remains the same. num_samples = data.num_samples; - for (size_t bid = 0; bid < num_samples; ++bid) { + for (size_t bid = 0; bid < num_samples / FLAGS_batch_size; ++bid) { PrepareInputs(&input_slots, &data, FLAGS_batch_size); timer.tic(); predictor->Run(input_slots, &outputs); From 8a1abe54d797de7c4f17ab92d2268c3cebf83b66 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 11 Sep 2018 18:30:49 +0800 Subject: [PATCH 07/17] clean fusion infershape code --- paddle/fluid/operators/attention_lstm_op.cc | 35 +---------- paddle/fluid/operators/fusion_gru_op.cc | 35 +---------- .../operators/fusion_infershape_define.h | 60 +++++++++++++++++++ paddle/fluid/operators/fusion_lstm_op.cc | 35 +---------- 4 files changed, 66 insertions(+), 99 deletions(-) create mode 100644 paddle/fluid/operators/fusion_infershape_define.h diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index ac4ddb5502..7531aa9a46 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" #include -#include "paddle/fluid/framework/shape_runtime_infer.h" +#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -24,38 +24,7 @@ namespace paddle { namespace operators { void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - auto* runtime_ctx = dynamic_cast(ctx); - if (runtime_ctx == nullptr) { - LOG(FATAL) << "Should have runtime infer context"; - } - const auto& ins = runtime_ctx->OpBase().Inputs(); - const auto& outs = runtime_ctx->OpBase().Outputs(); - const auto& scope = runtime_ctx->InferScope(); - const auto ins_end = ins.end(); - const auto outs_end = outs.end(); - auto fair_input = [&](const std::string& name) -> bool { - auto it = ins.find(name); - if (it == ins_end) { - return false; - } - const auto& in = it->second; - if (in.size() != 1 || in[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(in[0]) != nullptr; - }; - auto fair_output = [&](const std::string& name) -> bool { - auto it = outs.find(name); - if (it == outs_end) { - return false; - } - const auto& out = it->second; - if (out.size() != 1 || out[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(out[0]) != nullptr; - }; - + FUSION_INFERSHAPE_INIT; PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of AttentionLSTM."); PADDLE_ENFORCE(fair_input("C0"), "Assert only one Input(C0) of AttentionLSTM."); diff --git a/paddle/fluid/operators/fusion_gru_op.cc b/paddle/fluid/operators/fusion_gru_op.cc index bcdcb2ac4d..b10d311f05 100644 --- a/paddle/fluid/operators/fusion_gru_op.cc +++ b/paddle/fluid/operators/fusion_gru_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_gru_op.h" #include // for memcpy #include -#include "paddle/fluid/framework/shape_runtime_infer.h" +#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -26,38 +26,7 @@ namespace paddle { namespace operators { void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { - auto* runtime_ctx = dynamic_cast(ctx); - if (runtime_ctx == nullptr) { - LOG(FATAL) << "Should have runtime infer context"; - } - const auto& ins = runtime_ctx->OpBase().Inputs(); - const auto& outs = runtime_ctx->OpBase().Outputs(); - const auto& scope = runtime_ctx->InferScope(); - const auto ins_end = ins.end(); - const auto outs_end = outs.end(); - auto fair_input = [&](const std::string& name) -> bool { - auto it = ins.find(name); - if (it == ins_end) { - return false; - } - const auto& in = it->second; - if (in.size() != 1 || in[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(in[0]) != nullptr; - }; - auto fair_output = [&](const std::string& name) -> bool { - auto it = outs.find(name); - if (it == outs_end) { - return false; - } - const auto& out = it->second; - if (out.size() != 1 || out[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(out[0]) != nullptr; - }; - + FUSION_INFERSHAPE_INIT; PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of GRU."); PADDLE_ENFORCE(fair_input("WeightX"), "Assert only one Input(WeightX) of GRU."); diff --git a/paddle/fluid/operators/fusion_infershape_define.h b/paddle/fluid/operators/fusion_infershape_define.h new file mode 100644 index 0000000000..89521672b0 --- /dev/null +++ b/paddle/fluid/operators/fusion_infershape_define.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ +#define PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ + +#include +#include "paddle/fluid/framework/shape_runtime_infer.h" + +namespace paddle { +namespace operators { + +#define FUSION_INFERSHAPE_INIT \ + auto* runtime_ctx = dynamic_cast(ctx); \ + if (runtime_ctx == nullptr) { \ + LOG(FATAL) << "Should have runtime infer context"; \ + } \ + const auto& ins = runtime_ctx->OpBase().Inputs(); \ + const auto& outs = runtime_ctx->OpBase().Outputs(); \ + const auto& scope = runtime_ctx->InferScope(); \ + const auto ins_end = ins.end(); \ + const auto outs_end = outs.end(); \ + auto fair_input = [&](const std::string& name) -> bool { \ + auto it = ins.find(name); \ + if (it == ins_end) { \ + return false; \ + } \ + const auto& in = it->second; \ + if (in.size() != 1 || in[0] == framework::kEmptyVarName) { \ + return false; \ + } \ + return scope.FindVar(in[0]) != nullptr; \ + }; \ + auto fair_output = [&](const std::string& name) -> bool { \ + auto it = outs.find(name); \ + if (it == outs_end) { \ + return false; \ + } \ + const auto& out = it->second; \ + if (out.size() != 1 || out[0] == framework::kEmptyVarName) { \ + return false; \ + } \ + return scope.FindVar(out[0]) != nullptr; \ + } + +} // namespace operators +} // namespace paddle + +#endif // PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ diff --git a/paddle/fluid/operators/fusion_lstm_op.cc b/paddle/fluid/operators/fusion_lstm_op.cc index ae9d5d78ae..08af98f850 100644 --- a/paddle/fluid/operators/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fusion_lstm_op.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_lstm_op.h" #include -#include "paddle/fluid/framework/shape_runtime_infer.h" +#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -25,38 +25,7 @@ namespace paddle { namespace operators { void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - auto* runtime_ctx = dynamic_cast(ctx); - if (runtime_ctx == nullptr) { - LOG(FATAL) << "Should have runtime infer context"; - } - const auto& ins = runtime_ctx->OpBase().Inputs(); - const auto& outs = runtime_ctx->OpBase().Outputs(); - const auto& scope = runtime_ctx->InferScope(); - const auto ins_end = ins.end(); - const auto outs_end = outs.end(); - auto fair_input = [&](const std::string& name) -> bool { - auto it = ins.find(name); - if (it == ins_end) { - return false; - } - const auto& in = it->second; - if (in.size() != 1 || in[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(in[0]) != nullptr; - }; - auto fair_output = [&](const std::string& name) -> bool { - auto it = outs.find(name); - if (it == outs_end) { - return false; - } - const auto& out = it->second; - if (out.size() != 1 || out[0] == framework::kEmptyVarName) { - return false; - } - return scope.FindVar(out[0]) != nullptr; - }; - + FUSION_INFERSHAPE_INIT; PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of LSTM."); PADDLE_ENFORCE(fair_input("WeightX"), "Assert only one Input(WeightX) of LSTM."); From 7dd54afd0c7f328891fbb0df15e434aa9afba216 Mon Sep 17 00:00:00 2001 From: JiabinYang Date: Tue, 11 Sep 2018 12:12:46 +0000 Subject: [PATCH 08/17] fix program desc unit test error --- paddle/fluid/framework/program_desc_test.cc | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/program_desc_test.cc b/paddle/fluid/framework/program_desc_test.cc index 925ea98dbe..7e689a37da 100644 --- a/paddle/fluid/framework/program_desc_test.cc +++ b/paddle/fluid/framework/program_desc_test.cc @@ -87,8 +87,17 @@ TEST(ProgramDesc, copy_ctor) { ASSERT_EQ(op_origin->Inputs(), op_copy->Inputs()); ASSERT_EQ(op_origin->Outputs(), op_copy->Outputs()); - ASSERT_EQ(op_copy->Proto()->SerializeAsString(), - op_origin->Proto()->SerializeAsString()); + ASSERT_EQ(op_origin->Proto()->attrs().size(), + op_copy->Proto()->attrs().size()); + for (auto it = op_origin->Proto()->attrs().begin(); + it != op_origin->Proto()->attrs().end(); ++it) { + for (auto it_2 = op_copy->Proto()->attrs().begin(); + it_2 != op_copy->Proto()->attrs().end(); ++it_2) { + if (it->name() == it_2->name()) { + ASSERT_TRUE(it_2->SerializeAsString() == it->SerializeAsString()); + } + } + } if (op->Type() == "op_with_subblock") { ASSERT_EQ(1, op->GetBlockAttrId("sub_block")); From 8bb824bb93629fbf69d7e93ffc0dca85e726300c Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 12 Sep 2018 00:06:58 +0800 Subject: [PATCH 09/17] refine infershape hasinput and hasoutput --- paddle/fluid/framework/operator.cc | 274 ++++++++++-------- paddle/fluid/framework/shape_runtime_infer.h | 86 ------ paddle/fluid/operators/attention_lstm_op.cc | 35 ++- paddle/fluid/operators/fusion_gru_op.cc | 22 +- .../operators/fusion_infershape_define.h | 60 ---- paddle/fluid/operators/fusion_lstm_op.cc | 31 +- 6 files changed, 197 insertions(+), 311 deletions(-) delete mode 100644 paddle/fluid/framework/shape_runtime_infer.h delete mode 100644 paddle/fluid/operators/fusion_infershape_define.h diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 36025db7ba..bbd141cb3b 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -21,7 +21,6 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/shape_inference.h" -#include "paddle/fluid/framework/shape_runtime_infer.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" @@ -459,147 +458,184 @@ bool OpSupportGPU(const std::string& op_type) { return false; } -bool RuntimeInferShapeContext::HasInput(const std::string& name) const { - if (!op_.HasInputs(name)) { - return false; - } - auto& ins = Inputs(name); - size_t length = ins.size(); - if (length == 0) { - return false; - } - PADDLE_ENFORCE_EQ(length, 1UL, - "Input %s should not have more than one inputs", name); - auto ipt = ins[0]; - auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); - return var != nullptr; -} +class RuntimeInferShapeContext : public InferShapeContext { + public: + RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope) + : op_(op), scope_(scope) {} -bool RuntimeInferShapeContext::HasOutput(const std::string& name) const { - if (!op_.HasOutputs(name)) { - return false; - } - auto& outs = Outputs(name); - size_t length = outs.size(); - if (length == 0) { - return false; - } - PADDLE_ENFORCE_EQ(length, 1UL, - "Output %s should not have more than one inputs", name); - auto ipt = outs[0]; - auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt); - return var != nullptr; -} + bool HasInput(const std::string& name) const override { + // has only one input + const auto& ins = op_.Inputs(); + auto it = ins.find(name); + if (it == ins.end()) { + return false; + } + const auto& in = it->second; -bool RuntimeInferShapeContext::HasInputs(const std::string& name) const { - if (!op_.HasInputs(name)) { - return false; - } - auto inputs = op_.Inputs(name); - if (inputs.empty()) { - return false; - } - for (auto& input : inputs) { - if (scope_.FindVar(input) == nullptr) { + if (in.size() != 1 || in[0] == kEmptyVarName) { return false; } + return scope_.FindVar(in[0]) != nullptr; } - return true; -} -bool RuntimeInferShapeContext::HasOutputs(const std::string& name) const { - if (!op_.HasOutputs(name)) { - return false; + bool HasOutput(const std::string& name) const override { + // has only one output + const auto& outs = op_.Outputs(); + auto it = outs.find(name); + if (it == outs.end()) { + return false; + } + const auto& out = it->second; + if (out.size() != 1 || out[0] == kEmptyVarName) { + return false; + } + return scope_.FindVar(out[0]) != nullptr; } - auto outputs = op_.Outputs(name); - if (outputs.empty()) { - return false; + + bool HasInputs(const std::string& name) const override { + if (!op_.HasInputs(name)) { + return false; + } + auto inputs = op_.Inputs(name); + if (inputs.empty()) { + return false; + } + for (auto& input : inputs) { + if (scope_.FindVar(input) == nullptr) { + return false; + } + } + return true; } - for (auto& output : outputs) { - if (scope_.FindVar(output) == nullptr) { + + bool HasOutputs(const std::string& name) const override { + if (!op_.HasOutputs(name)) { + return false; + } + auto outputs = op_.Outputs(name); + if (outputs.empty()) { return false; } + for (auto& output : outputs) { + if (scope_.FindVar(output) == nullptr) { + return false; + } + } + return true; } - return true; -} -void RuntimeInferShapeContext::ShareLoD(const std::string& in, - const std::string& out, size_t i, - size_t j) const { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); - Variable* in_var = scope_.FindVar(Inputs(in)[i]); - Variable* out_var = scope_.FindVar(Outputs(out)[j]); - if (!in_var->IsType()) return; - PADDLE_ENFORCE(out_var->IsType(), - "The %d-th output of Output(%s) must be LoDTensor.", j, out); - auto in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); - out_tensor->set_lod(in_tensor.lod()); + AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } + + const std::vector& Inputs( + const std::string& name) const override { + return op_.Inputs(name); + } + + const std::vector& Outputs( + const std::string& name) const override { + return op_.Outputs(name); + } + + void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, + size_t j = 0) const override { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + Variable* in_var = scope_.FindVar(Inputs(in)[i]); + Variable* out_var = scope_.FindVar(Outputs(out)[j]); + if (!in_var->IsType()) return; + PADDLE_ENFORCE(out_var->IsType(), + "The %d-th output of Output(%s) must be LoDTensor.", j, out); + auto in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); + out_tensor->set_lod(in_tensor.lod()); // TODO(dzhwinter) : reuse ShareLoD in most operators. // Need to call ShareLayout explicitly in sequence related ops. // Shall we have a better method to shared info between in/out Tensor? #ifdef PADDLE_WITH_MKLDNN - // Fix me: ugly workaround below - // Correct solution: - // set_layout() should NOT be called here (i.e. ShareLoD). Instead, - // layout of output tensor should be set "manually" in Compute() - // of each OPKernel. The reason layout should NOT be shared between - // input and output "automatically" (now by InferShape()->ShareLoD()) - // is that layout transform may occur after InferShape(). - // Workaround: - // Skip set_layout() when input layout is kMKLDNN - // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN - // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called - // in Compute() - if (in_tensor.layout() != DataLayout::kMKLDNN) + // Fix me: ugly workaround below + // Correct solution: + // set_layout() should NOT be called here (i.e. ShareLoD). Instead, + // layout of output tensor should be set "manually" in Compute() + // of each OPKernel. The reason layout should NOT be shared between + // input and output "automatically" (now by InferShape()->ShareLoD()) + // is that layout transform may occur after InferShape(). + // Workaround: + // Skip set_layout() when input layout is kMKLDNN + // This is to avoid kMKLDNN is populated wrongly into a non-MKLDNN + // OPKernel. In all MKLDNN OPkernel, set_layout(kMKLDNN) should be called + // in Compute() + if (in_tensor.layout() != DataLayout::kMKLDNN) #endif + out_tensor->set_layout(in_tensor.layout()); + } + + void ShareLayout(const std::string& in, const std::string& out, size_t i = 0, + size_t j = 0) const { + PADDLE_ENFORCE_LT(i, Inputs(in).size()); + PADDLE_ENFORCE_LT(j, Outputs(out).size()); + Variable* in_var = scope_.FindVar(Inputs(in)[i]); + Variable* out_var = scope_.FindVar(Outputs(out)[j]); + if (!in_var->IsType()) return; + PADDLE_ENFORCE(out_var->IsType(), + "The %d-th output of Output(%s) must be LoDTensor.", j, out); + auto in_tensor = in_var->Get(); + auto* out_tensor = out_var->GetMutable(); out_tensor->set_layout(in_tensor.layout()); -} + } -void RuntimeInferShapeContext::ShareLayout(const std::string& in, - const std::string& out, size_t i, - size_t j) const { - PADDLE_ENFORCE_LT(i, Inputs(in).size()); - PADDLE_ENFORCE_LT(j, Outputs(out).size()); - Variable* in_var = scope_.FindVar(Inputs(in)[i]); - Variable* out_var = scope_.FindVar(Outputs(out)[j]); - if (!in_var->IsType()) return; - PADDLE_ENFORCE(out_var->IsType(), - "The %d-th output of Output(%s) must be LoDTensor.", j, out); - auto in_tensor = in_var->Get(); - auto* out_tensor = out_var->GetMutable(); - out_tensor->set_layout(in_tensor.layout()); -} - -DDim RuntimeInferShapeContext::GetDim(const std::string& name) const { - Variable* var = scope_.FindVar(name); - PADDLE_ENFORCE_NOT_NULL(var); - if (var->IsType()) { - return var->Get().dims(); - } else if (var->IsType()) { - return var->Get().GetCompleteDims(); - } else { - PADDLE_THROW( - "Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's " - "type_id is %s.", - name, var->Type().name()); + bool IsRuntime() const override { return true; } + + protected: + DDim GetDim(const std::string& name) const override { + Variable* var = scope_.FindVar(name); + PADDLE_ENFORCE_NOT_NULL(var); + if (var->IsType()) { + return var->Get().dims(); + } else if (var->IsType()) { + return var->Get().GetCompleteDims(); + } else { + PADDLE_THROW( + "Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's " + "type_id is %s.", + name, var->Type().name()); + } } -} -void RuntimeInferShapeContext::SetDim(const std::string& name, - const DDim& dim) { - Variable* var = scope_.FindVar(name); - if (var->IsType()) { - var->GetMutable()->Resize(dim); - } else if (var->IsType()) { - var->GetMutable()->set_height(dim[0]); - } else { - PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", name, - var->Type().name()); + std::vector GetRepeatedDims(const std::string& name) const override { + PADDLE_THROW("Only compile time support this method"); } -} + + void SetDim(const std::string& name, const DDim& dim) override { + Variable* var = scope_.FindVar(name); + if (var->IsType()) { + var->GetMutable()->Resize(dim); + } else if (var->IsType()) { + var->GetMutable()->set_height(dim[0]); + } else { + PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.", + name, var->Type().name()); + } + } + + void SetRepeatedDims(const std::string& name, + const std::vector& dims) override { + PADDLE_THROW("Only compile time support this method"); + } + + proto::VarType::Type GetVarType(const std::string& name) const override { + auto* var = scope_.FindVar(name); + return ToVarType(var->Type()); + } + + InferShapeVarPtr GetVarPtr(const std::string& name) override { + return scope_.FindVar(name); + } + + private: + const OperatorBase& op_; + const Scope& scope_; +}; static void CheckTensorNANOrInf(const std::string& name, const framework::Tensor& tensor) { diff --git a/paddle/fluid/framework/shape_runtime_infer.h b/paddle/fluid/framework/shape_runtime_infer.h deleted file mode 100644 index 04d4e33f7a..0000000000 --- a/paddle/fluid/framework/shape_runtime_infer.h +++ /dev/null @@ -1,86 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/shape_inference.h" -#include "paddle/fluid/framework/var_type.h" - -namespace paddle { -namespace framework { - -class RuntimeInferShapeContext : public InferShapeContext { - public: - RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope) - : op_(op), scope_(scope) {} - - bool HasInput(const std::string& name) const override; - bool HasOutput(const std::string& name) const override; - bool HasInputs(const std::string& name) const override; - bool HasOutputs(const std::string& name) const override; - - const OperatorBase& OpBase() const { return op_; } - - const Scope& InferScope() const { return scope_; } - AttrReader Attrs() const override { return AttrReader(op_.Attrs()); } - - const std::vector& Inputs( - const std::string& name) const override { - return op_.Inputs(name); - } - - const std::vector& Outputs( - const std::string& name) const override { - return op_.Outputs(name); - } - - void ShareLoD(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) const override; - - void ShareLayout(const std::string& in, const std::string& out, size_t i = 0, - size_t j = 0) const; - - bool IsRuntime() const override { return true; } - - protected: - DDim GetDim(const std::string& name) const override; - void SetDim(const std::string& name, const DDim& dim) override; - - std::vector GetRepeatedDims(const std::string& name) const override { - PADDLE_THROW("Only compile time support this method"); - } - void SetRepeatedDims(const std::string& name, - const std::vector& dims) override { - PADDLE_THROW("Only compile time support this method"); - } - - proto::VarType::Type GetVarType(const std::string& name) const override { - auto* var = scope_.FindVar(name); - return ToVarType(var->Type()); - } - - InferShapeVarPtr GetVarPtr(const std::string& name) override { - return scope_.FindVar(name); - } - - private: - const OperatorBase& op_; - const Scope& scope_; -}; - -} // namespace framework -} // namespace paddle diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 7531aa9a46..9b943440a8 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" #include -#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -24,28 +23,28 @@ namespace paddle { namespace operators { void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - FUSION_INFERSHAPE_INIT; - PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of AttentionLSTM."); - PADDLE_ENFORCE(fair_input("C0"), + PADDLE_ENFORCE(ctx->HasInput("X"), + "Assert only one Input(X) of AttentionLSTM."); + PADDLE_ENFORCE(ctx->HasInput("C0"), "Assert only one Input(C0) of AttentionLSTM."); - PADDLE_ENFORCE(fair_input("LSTMWeight"), + PADDLE_ENFORCE(ctx->HasInput("LSTMWeight"), "Assert only one Input(LSTMWeight) of AttentionLSTM."); - PADDLE_ENFORCE(fair_input("LSTMBias"), + PADDLE_ENFORCE(ctx->HasInput("LSTMBias"), "Assert only one Input(LSTMBias) of AttentionLSTM."); - PADDLE_ENFORCE(fair_input("AttentionWeight"), + PADDLE_ENFORCE(ctx->HasInput("AttentionWeight"), "Assert only one Input(AttentionWeight) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("Hidden"), + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), "Assert only one Output(Hidden) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("Cell"), + PADDLE_ENFORCE(ctx->HasOutput("Cell"), "Assert only one Output(Cell) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("AttentionedX"), + PADDLE_ENFORCE(ctx->HasOutput("AttentionedX"), "Assert only one Output(AttentionedX) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("AttentionFCOut"), + PADDLE_ENFORCE(ctx->HasOutput("AttentionFCOut"), "Assert only one Output(AttentionFCOut) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("LSTMX"), + PADDLE_ENFORCE(ctx->HasOutput("LSTMX"), "Assert only one Output(LSTMX) of AttentionLSTM."); - PADDLE_ENFORCE(fair_output("LSTMOUT"), + PADDLE_ENFORCE(ctx->HasOutput("LSTMOUT"), "Assert only one Output(LSTMOUT) of AttentionLSTM."); auto x_dims = ctx->GetInputDim("X"); @@ -66,7 +65,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { auto c_dims = ctx->GetInputDim("C0"); PADDLE_ENFORCE_EQ(c_dims.size(), 2, "Input(C0)'s rank must be 2."); PADDLE_ENFORCE_EQ(c_dims[1], D, "C0 dims should be N x %d.", D); - if (fair_input("H0")) { + if (ctx->HasInput("H0")) { auto h_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE(h_dims == c_dims, "The dimension of Input(H0) and Input(C0) " @@ -80,7 +79,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionWeight shapes must be (%d + %d) * 1.", M, D); PADDLE_ENFORCE_EQ(atten_w_dims[1], 1, "AttentionWeight shapes must be (%d + %d) * 1.", M, D); - if (fair_input("AttentionBias")) { + if (ctx->HasInput("AttentionBias")) { auto atten_b_dims = ctx->GetInputDim("AttentionBias"); PADDLE_ENFORCE_EQ(atten_b_dims.size(), 2, "Input(AttentionBias)'s rank must be 2."); @@ -90,7 +89,7 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "AttentionBias shapes must be 1 * 1."); } - if (fair_input("AttentionScalar")) { + if (ctx->HasInput("AttentionScalar")) { auto dims = ctx->GetInputDim("AttentionScalar"); PADDLE_ENFORCE_EQ(dims.size(), 2, "Input(AttentionScalar)'s rank must be 2."); @@ -98,10 +97,10 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(dims[1], 1, "AttentionScalar shapes must be 1 * 1."); } - if (fair_input("AttentionScalarBias")) { + if (ctx->HasInput("AttentionScalarBias")) { auto dims = ctx->GetInputDim("AttentionScalarBias"); PADDLE_ENFORCE( - fair_input("AttentionScalar"), + ctx->HasInput("AttentionScalar"), "AttentionScalar should not be null when have AttentionScalarBias."); PADDLE_ENFORCE_EQ(dims.size(), 2, "Input(AttentionScalarBias)'s rank must be 2."); diff --git a/paddle/fluid/operators/fusion_gru_op.cc b/paddle/fluid/operators/fusion_gru_op.cc index b10d311f05..31e87d9113 100644 --- a/paddle/fluid/operators/fusion_gru_op.cc +++ b/paddle/fluid/operators/fusion_gru_op.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_gru_op.h" #include // for memcpy #include -#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -26,14 +25,13 @@ namespace paddle { namespace operators { void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { - FUSION_INFERSHAPE_INIT; - PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of GRU."); - PADDLE_ENFORCE(fair_input("WeightX"), + PADDLE_ENFORCE(ctx->HasInput("X"), "Assert only one Input(X) of GRU."); + PADDLE_ENFORCE(ctx->HasInput("WeightX"), "Assert only one Input(WeightX) of GRU."); - PADDLE_ENFORCE(fair_input("WeightH"), + PADDLE_ENFORCE(ctx->HasInput("WeightH"), "Assert only one Input(WeightH) of GRU."); - PADDLE_ENFORCE(fair_output("XX"), "Assert only one Output(XX) of GRU."); - PADDLE_ENFORCE(fair_output("Hidden"), + PADDLE_ENFORCE(ctx->HasOutput("XX"), "Assert only one Output(XX) of GRU."); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), "Assert only one Output(Hidden) of GRU."); auto x_dims = ctx->GetInputDim("X"); @@ -60,12 +58,12 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { "should be 3 * %d.", frame_size); - if (fair_input("H0")) { + if (ctx->HasInput("H0")) { auto h0_dims = ctx->GetInputDim("H0"); PADDLE_ENFORCE_EQ(h0_dims[1], frame_size, "The width of H0 must be equal to frame_size."); } - if (fair_input("Bias")) { + if (ctx->HasInput("Bias")) { auto b_dims = ctx->GetInputDim("Bias"); PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2."); PADDLE_ENFORCE_EQ(b_dims[0], 1, @@ -81,11 +79,11 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; - PADDLE_ENFORCE(fair_output("ReorderedH0"), + PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), "Assert only one Output(ReorderedH0) of GRU."); - PADDLE_ENFORCE(fair_output("BatchedInput"), + PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), "Assert only one Output(BatchedInput) of GRU."); - PADDLE_ENFORCE(fair_output("BatchedOut"), + PADDLE_ENFORCE(ctx->HasOutput("BatchedOut"), "Assert only one Output(BatchedOut) of GRU."); ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); ctx->SetOutputDim("BatchedOut", out_dims); diff --git a/paddle/fluid/operators/fusion_infershape_define.h b/paddle/fluid/operators/fusion_infershape_define.h deleted file mode 100644 index 89521672b0..0000000000 --- a/paddle/fluid/operators/fusion_infershape_define.h +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ -#define PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ - -#include -#include "paddle/fluid/framework/shape_runtime_infer.h" - -namespace paddle { -namespace operators { - -#define FUSION_INFERSHAPE_INIT \ - auto* runtime_ctx = dynamic_cast(ctx); \ - if (runtime_ctx == nullptr) { \ - LOG(FATAL) << "Should have runtime infer context"; \ - } \ - const auto& ins = runtime_ctx->OpBase().Inputs(); \ - const auto& outs = runtime_ctx->OpBase().Outputs(); \ - const auto& scope = runtime_ctx->InferScope(); \ - const auto ins_end = ins.end(); \ - const auto outs_end = outs.end(); \ - auto fair_input = [&](const std::string& name) -> bool { \ - auto it = ins.find(name); \ - if (it == ins_end) { \ - return false; \ - } \ - const auto& in = it->second; \ - if (in.size() != 1 || in[0] == framework::kEmptyVarName) { \ - return false; \ - } \ - return scope.FindVar(in[0]) != nullptr; \ - }; \ - auto fair_output = [&](const std::string& name) -> bool { \ - auto it = outs.find(name); \ - if (it == outs_end) { \ - return false; \ - } \ - const auto& out = it->second; \ - if (out.size() != 1 || out[0] == framework::kEmptyVarName) { \ - return false; \ - } \ - return scope.FindVar(out[0]) != nullptr; \ - } - -} // namespace operators -} // namespace paddle - -#endif // PADDLE_FLUID_OPERATORS_FUSION_INFERSHAPE_DEFINE_H_ diff --git a/paddle/fluid/operators/fusion_lstm_op.cc b/paddle/fluid/operators/fusion_lstm_op.cc index 08af98f850..55e465e3af 100644 --- a/paddle/fluid/operators/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fusion_lstm_op.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/fusion_lstm_op.h" #include -#include "paddle/fluid/operators/fusion_infershape_define.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/operators/math/fc_compute.h" @@ -25,23 +24,23 @@ namespace paddle { namespace operators { void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { - FUSION_INFERSHAPE_INIT; - PADDLE_ENFORCE(fair_input("X"), "Assert only one Input(X) of LSTM."); - PADDLE_ENFORCE(fair_input("WeightX"), + PADDLE_ENFORCE(ctx->HasInput("X"), "Assert only one Input(X) of LSTM."); + PADDLE_ENFORCE(ctx->HasInput("WeightX"), "Assert only one Input(WeightX) of LSTM."); - PADDLE_ENFORCE(fair_input("WeightH"), + PADDLE_ENFORCE(ctx->HasInput("WeightH"), "Assert only one Input(WeightH) of LSTM."); - PADDLE_ENFORCE(fair_input("Bias"), "Assert only one Input(Bias) of LSTM."); - PADDLE_ENFORCE(fair_output("XX"), "Assert only one Output(XX) of LSTM."); - PADDLE_ENFORCE(fair_output("Hidden"), + PADDLE_ENFORCE(ctx->HasInput("Bias"), "Assert only one Input(Bias) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("XX"), "Assert only one Output(XX) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("Hidden"), "Assert only one Output(Hidden) of LSTM."); - PADDLE_ENFORCE(fair_output("Cell"), "Assert only one Output(Cell) of LSTM."); + PADDLE_ENFORCE(ctx->HasOutput("Cell"), + "Assert only one Output(Cell) of LSTM."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank must be 2."); - if (fair_input("H0")) { - PADDLE_ENFORCE(fair_input("C0"), + if (ctx->HasInput("H0")) { + PADDLE_ENFORCE(ctx->HasInput("C0"), "Input(Cell) and Input(Hidden) of LSTM should not " "be null at the same time."); auto h_dims = ctx->GetInputDim("H0"); @@ -93,15 +92,15 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; - PADDLE_ENFORCE(fair_output("BatchedInput"), + PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), "Assert only one Output(BatchedInput) of LSTM."); - PADDLE_ENFORCE(fair_output("BatchedHidden"), + PADDLE_ENFORCE(ctx->HasOutput("BatchedHidden"), "Assert only one Output(BatchedHidden) of LSTM."); - PADDLE_ENFORCE(fair_output("BatchedCell"), + PADDLE_ENFORCE(ctx->HasOutput("BatchedCell"), "Assert only one Output(BatchedCell) of LSTM."); - PADDLE_ENFORCE(fair_output("ReorderedH0"), + PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), "Assert only one Output(ReorderedH0) of LSTM"); - PADDLE_ENFORCE(fair_output("ReorderedC0"), + PADDLE_ENFORCE(ctx->HasOutput("ReorderedC0"), "Assert only one Output(ReorderedC0) of LSTM."); ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); ctx->SetOutputDim("BatchedHidden", out_dims); From 312e92ab072297dae3bf2baf6479b51bfc9b88e6 Mon Sep 17 00:00:00 2001 From: Shan Yi <35982308+shanyi15@users.noreply.github.com> Date: Wed, 12 Sep 2018 10:43:32 +0800 Subject: [PATCH 10/17] update-readme --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 60ffbe7281..45186ec4ef 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Our vision is to enable deep learning for everyone via PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. -### Latest PaddlePaddle Release: [Fluid 0.14.0](https://github.com/PaddlePaddle/Paddle/tree/v0.14.0) +### Latest PaddlePaddle Release: [Fluid 0.15.0](https://github.com/PaddlePaddle/Paddle/tree/v0.15.0) ### Install Latest Stable Release: ``` # Linux CPU @@ -76,26 +76,26 @@ pip install paddlepaddle-gpu==0.14.0.post85 ## Installation -It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/install/install_doc.html) on our website. +It is recommended to read [this doc](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/install/install_doc.html) on our website. ## Documentation -We provide [English](http://paddlepaddle.org/documentation/docs/en/0.14.0/getstarted/index_en.html) and -[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/beginners_guide/index.html) documentation. +We provide [English](http://paddlepaddle.org/documentation/docs/en/0.15.0/getstarted/index_en.html) and +[Chinese](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/beginners_guide/index.html) documentation. - [Deep Learning 101](https://github.com/PaddlePaddle/book) You might want to start from this online interactive book that can run in a Jupyter Notebook. -- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/user_guides/howto/training/cluster_howto.html) +- [Distributed Training](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/user_guides/howto/training/cluster_howto.html) You can run distributed training jobs on MPI clusters. -- [Python API](http://paddlepaddle.org/documentation/api/zh/0.14.0/fluid.html) +- [Python API](http://paddlepaddle.org/documentation/api/zh/0.15.0/fluid.html) Our new API enables much shorter programs. -- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.14.0/new_docs/advanced_usage/development/contribute_to_paddle.html) +- [How to Contribute](http://paddlepaddle.org/documentation/docs/zh/0.15.0/new_docs/advanced_usage/development/contribute_to_paddle.html) We appreciate your contributions! From 36d6e44681c3ebe1ff3992b37a981ca468580080 Mon Sep 17 00:00:00 2001 From: JiabinYang Date: Wed, 12 Sep 2018 03:41:39 +0000 Subject: [PATCH 11/17] fix test_py_reader_using_executor error --- .../fluid/tests/unittests/test_py_reader_using_executor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py index 931cac409f..0fb9518a45 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -96,7 +96,8 @@ class TestPyReaderUsingExecutor(unittest.TestCase): self.queue_capacity = 50 def test(self): - for use_cuda in [False, True]: + for use_cuda in ([False, True] + if core.core.is_compiled_with_cuda() else [False]): for use_parallel_executor in [False, True]: for use_double_buffer in [False, True]: print('Test Parameters:'), From d61c11764af1249c8acc6937f2c25a8ae6c86c3e Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 12 Sep 2018 12:50:50 +0800 Subject: [PATCH 12/17] follow comment add enforce --- paddle/fluid/framework/operator.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index bbd141cb3b..b7fae7171a 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -471,10 +471,11 @@ class RuntimeInferShapeContext : public InferShapeContext { return false; } const auto& in = it->second; - - if (in.size() != 1 || in[0] == kEmptyVarName) { + if (in.size() == 0 || in[0] == kEmptyVarName) { return false; } + PADDLE_ENFORCE_EQ(in.size(), 1UL, + "Input %s should not have more than one inputs", name); return scope_.FindVar(in[0]) != nullptr; } @@ -486,9 +487,11 @@ class RuntimeInferShapeContext : public InferShapeContext { return false; } const auto& out = it->second; - if (out.size() != 1 || out[0] == kEmptyVarName) { + if (out.size() == 0 || out[0] == kEmptyVarName) { return false; } + PADDLE_ENFORCE_EQ(out.size(), 1UL, + "Output %s should not have more than one outputs", name); return scope_.FindVar(out[0]) != nullptr; } From d41176411fd4f5f06155c7c73264f1145ecccee7 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Wed, 12 Sep 2018 13:08:02 +0800 Subject: [PATCH 13/17] Update test_py_reader_using_executor.py --- .../fluid/tests/unittests/test_py_reader_using_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py index 0fb9518a45..b7fad9b3a6 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py @@ -97,7 +97,7 @@ class TestPyReaderUsingExecutor(unittest.TestCase): def test(self): for use_cuda in ([False, True] - if core.core.is_compiled_with_cuda() else [False]): + if core.is_compiled_with_cuda() else [False]): for use_parallel_executor in [False, True]: for use_double_buffer in [False, True]: print('Test Parameters:'), From bdd957b4be7a023426f76ae6e3153aa5a0e1686f Mon Sep 17 00:00:00 2001 From: JiabinYang Date: Wed, 12 Sep 2018 05:18:22 +0000 Subject: [PATCH 14/17] fix test_parallel_executor_transformer --- .../tests/unittests/test_parallel_executor_transformer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py index 5ad922725a..a55b2002ed 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_transformer.py @@ -20,6 +20,7 @@ import numpy as np from parallel_executor_test_base import TestParallelExecutorBase import unittest import paddle +import paddle.fluid.core as core import paddle.dataset.wmt16 as wmt16 import os @@ -170,7 +171,8 @@ class TestTransformer(TestParallelExecutorBase): writer.complete_append_tensor() def test_main(self): - self.check_network_convergence(transformer, use_cuda=True) + if core.is_compiled_with_cuda(): + self.check_network_convergence(transformer, use_cuda=True) self.check_network_convergence(transformer, use_cuda=False, iter=5) From 41de582bb092dfa67bd2a1fa5d3b469db1ae81e2 Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Wed, 12 Sep 2018 10:22:11 +0200 Subject: [PATCH 15/17] create conv relu pass for MKLDNN (#13258) --- paddle/fluid/framework/ir/CMakeLists.txt | 6 + .../ir/conv_relu_mkldnn_fuse_pass.cc | 90 +++++++++++++++ .../framework/ir/conv_relu_mkldnn_fuse_pass.h | 39 +++++++ .../ir/conv_relu_mkldnn_fuse_pass_tester.cc | 108 ++++++++++++++++++ .../framework/ir/graph_pattern_detector.cc | 33 ++++++ .../framework/ir/graph_pattern_detector.h | 22 ++++ 6 files changed, 298 insertions(+) create mode 100644 paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc create mode 100644 paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h create mode 100644 paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index ce3ebed00b..7004f484a9 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -28,6 +28,9 @@ cc_library(graph_pattern_detector SRCS graph_pattern_detector.cc DEPS graph grap pass_library(graph_to_program_pass base) pass_library(graph_viz_pass base) pass_library(fc_fuse_pass inference) +if(WITH_MKLDNN) + pass_library(conv_relu_mkldnn_fuse_pass inference) +endif() pass_library(attention_lstm_fuse_pass inference) pass_library(infer_clean_graph_pass inference) pass_library(fc_lstm_fuse_pass inference) @@ -42,3 +45,6 @@ cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_r cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass) cc_test(test_graph_pattern_detector SRCS graph_pattern_detector_tester.cc DEPS graph_pattern_detector) cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass framework_proto) +if(WITH_MKLDNN) + cc_test(test_conv_relu_mkldnn_fuse_pass SRCS conv_relu_mkldnn_fuse_pass_tester.cc DEPS conv_relu_mkldnn_fuse_pass) +endif() diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc new file mode 100644 index 0000000000..4408cb45ac --- /dev/null +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h" +#include +#include +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace ir { + +std::unique_ptr ConvReLUFusePass::ApplyImpl( + std::unique_ptr graph) const { + PADDLE_ENFORCE(graph.get()); + FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get()); + + std::unordered_set nodes2delete; + + GraphPatternDetector gpd; + auto* conv_input = gpd.mutable_pattern() + ->NewNode("conv_relu_mkldnn_fuse/conv_input") + ->AsInput() + ->assert_is_op_input("conv2d", "Input"); + patterns::ConvReLU conv_relu_pattern(gpd.mutable_pattern(), + "conv_relu_mkldnn_fuse"); + conv_relu_pattern(conv_input); + + int found_conv_relu_count = 0; + auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, + Graph* g) { + VLOG(4) << "handle ConvReLU fuse"; + GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, + conv_relu_pattern); // Filter + GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern); // Bias + GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp + GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op + GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out + GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op + + // Create an ConvReLU Node. + OpDesc desc; + std::string conv_relu_i_in = subgraph.at(conv_input)->Name(); + std::string conv_relu_w_in = conv_weight->Name(); + std::string conv_relu_b_in = conv_bias->Name(); + std::string conv_relu_out = relu_out->Name(); + desc.SetInput("Input", std::vector({conv_relu_i_in})); + desc.SetInput("Filter", std::vector({conv_relu_w_in})); + desc.SetInput("Bias", std::vector({conv_relu_b_in})); + desc.SetOutput("Out", std::vector({conv_relu_out})); + desc.SetType("conv2d"); + for (auto& attr : conv->Op()->GetAttrMap()) { + desc.SetAttr(attr.first, attr.second); + } + desc.SetAttr("fuse_relu", true); + auto conv_relu_node = g->CreateOpNode(&desc); // OpDesc will be copied. + GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out}); + + PADDLE_ENFORCE(subgraph.count(conv_input)); + IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node); + IR_NODE_LINK_TO(conv_weight, conv_relu_node); + IR_NODE_LINK_TO(conv_bias, conv_relu_node); + IR_NODE_LINK_TO(conv_relu_node, relu_out); + + found_conv_relu_count++; + }; + + gpd(graph.get(), handler); + + AddStatis(found_conv_relu_count); + return graph; +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(conv_relu_mkldnn_fuse_pass, + paddle::framework::ir::ConvReLUFusePass); diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h new file mode 100644 index 0000000000..b5de0d5487 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +/* + * Fuse the CONV and ReLU to a ConvReLUOp. + */ +class ConvReLUFusePass : public FusePassBase { + public: + virtual ~ConvReLUFusePass() {} + + protected: + std::unique_ptr ApplyImpl(std::unique_ptr graph) const; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc new file mode 100644 index 0000000000..82b5fa1886 --- /dev/null +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.h" + +#include + +namespace paddle { +namespace framework { +namespace ir { + +void SetOp(ProgramDesc* prog, const std::string& type, + const std::vector& inputs, + const std::vector& outputs) { + auto* op = prog->MutableBlock(0)->AppendOp(); + op->SetType(type); + if (type == "conv2d") { + op->SetAttr("use_mkldnn", true); + op->SetInput("Input", {inputs[0]}); + op->SetInput("Filter", {inputs[1]}); + op->SetInput("Bias", {inputs[2]}); + } else if (type == "relu") { + op->SetInput("X", inputs); + } + op->SetOutput("Out", outputs); +} + +// a->OP0->b +// b->OP1->c +// (c, weights, bias)->conv->f +// (f)->relu->g +ProgramDesc BuildProgramDesc() { + ProgramDesc prog; + for (auto& v : + std::vector({"a", "b", "c", "weights", "bias", "f", "g"})) { + auto* var = prog.MutableBlock(0)->Var(v); + var->SetType(proto::VarType::SELECTED_ROWS); + if (v == "weights" || v == "bias") { + var->SetPersistable(true); + } + } + + SetOp(&prog, "OP0", std::vector({"a"}), + std::vector({"b"})); + SetOp(&prog, "OP1", std::vector({"b"}), + std::vector({"c"})); + SetOp(&prog, "conv2d", std::vector({"c", "weights", "bias"}), + std::vector({"f"})); + SetOp(&prog, "relu", std::vector({"f"}), + std::vector({"g"})); + + return prog; +} + +TEST(ConvReLUFusePass, basic) { + auto prog = BuildProgramDesc(); + + std::unique_ptr graph(new ir::Graph(prog)); + + auto pass = PassRegistry::Instance().Get("conv_relu_mkldnn_fuse_pass"); + + int original_nodes_num = graph->Nodes().size(); + + graph = pass->Apply(std::move(graph)); + + int current_nodes_num = graph->Nodes().size(); + + // Remove 3 Nodes: CONV, RELU, conv_out + // Add 1 Node: ConvReLU + EXPECT_EQ(original_nodes_num - 2, current_nodes_num); + + // Assert conv_relu op in newly generated graph + int conv_relu_count = 0; + + for (auto* node : graph->Nodes()) { + if (node->IsOp() && node->Op()->Type() == "conv2d") { + if (node->Op()->HasAttr("use_mkldnn")) { + bool use_mkldnn = boost::get(node->Op()->GetAttr("use_mkldnn")); + if (use_mkldnn) { + if (node->Op()->HasAttr("fuse_relu")) { + bool fuse_relu = boost::get(node->Op()->GetAttr("fuse_relu")); + if (fuse_relu) { + ++conv_relu_count; + } + } + } + } + } + } + EXPECT_EQ(conv_relu_count, 1); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(conv_relu_mkldnn_fuse_pass); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 5825a129b7..11d5998aaf 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -522,6 +522,39 @@ bool VarLinksFromOp(Node* node, const std::string& op_type) { return false; } +PDNode* patterns::ConvReLU::operator()( + paddle::framework::ir::PDNode* conv_input) { + // Create Operators + conv_input->assert_is_op_input("conv2d", "Input"); + auto* conv_op = pattern->NewNode(conv_repr())->assert_is_op("conv2d"); + auto* relu_op = pattern->NewNode(relu_repr())->assert_is_op("relu"); + // Create variables + // Filter + auto* conv_weight_var = pattern->NewNode(conv_weight_repr()) + ->AsInput() + ->assert_is_persistable_var() + ->assert_is_op_input("conv2d", "Filter"); + // Bias + auto* conv_bias_var = pattern->NewNode(conv_bias_repr()) + ->AsInput() + ->assert_is_persistable_var() + ->assert_is_op_input("conv2d", "Bias"); + // intermediate variable, will be removed in the IR after fuse. + auto* conv_out_var = pattern->NewNode(conv_out_repr()) + ->AsIntermediate() + ->assert_is_only_output_of_op("conv2d") + ->assert_is_op_input("relu"); + // output + auto* relu_out_var = pattern->NewNode(relu_out_repr()) + ->AsOutput() + ->assert_is_op_output("relu"); + + conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var}) + .LinksTo({conv_out_var}); + relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var}); + return relu_out_var; +} + PDNode* patterns::FC::operator()(paddle::framework::ir::PDNode* x, bool with_bias) { // Create shared nodes. diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 57482a07b6..371384dc56 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -360,6 +360,28 @@ struct PatternBase { size_t id_; }; +// CONV with ReLU +// op: conv + relu +// named nodes: +// conv_input, conv_weight, +// conv_bias, conv_out, conv, +// relu_out, relu +struct ConvReLU : public PatternBase { + ConvReLU(PDPattern* pattern, const std::string& name_scope) + : PatternBase(pattern, name_scope, "conv_relu") {} + + PDNode* operator()(PDNode* conv_input); + + // declare operator node's name + PATTERN_DECL_NODE(conv); + PATTERN_DECL_NODE(relu); + // declare variable node's name + PATTERN_DECL_NODE(conv_weight); + PATTERN_DECL_NODE(conv_bias); + PATTERN_DECL_NODE(conv_out); + PATTERN_DECL_NODE(relu_out); +}; + // FC with bias // op: mul + elementwise_add // named nodes: From b12322ce959a2ab79a1bae4e7aaf9e4b42d56909 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Wed, 12 Sep 2018 19:06:17 +0800 Subject: [PATCH 16/17] fix fusion_lstm unique_name bug --- paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc | 5 ++--- paddle/fluid/inference/analysis/ir_pass_manager.cc | 8 ++++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index f7fda87357..aa95d3e9f6 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -51,7 +51,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, if (with_fc_bias) { // Add FC-bias with LSTM-bias and create a new weight PADDLE_ENFORCE(scope); - const std::string& new_bias_var = name_scope + "_bias.new"; + const std::string& new_bias_var = patterns::UniqueKey("NewBias"); auto* bias_var = scope->Var(new_bias_var); PADDLE_ENFORCE(bias_var); auto* bias_tensor = bias_var->GetMutable(); @@ -120,7 +120,6 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* g) { - GET_IR_NODE_FROM_SUBGRAPH(lstm, lstm, lstm_pattern); GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, lstm_pattern); GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, lstm_pattern); @@ -136,7 +135,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, fc_bias); // Remove unneeded nodes. std::unordered_set marked_nodes( - {mul, lstm, elementwise_add}); + {mul, lstm, elementwise_add, fc_bias}); GraphSafeRemoveNodes(graph, marked_nodes); } else { GET_IR_NODE_FROM_SUBGRAPH(fc_out, mul_out, fc_pattern); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 30c1e8e93d..e76708baf4 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/inference/analysis/ir_pass_manager.h" #include +#include #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" @@ -37,13 +38,16 @@ IRPassManager::IRPassManager(const ProgramDesc &program, void IRPassManager::Apply(const std::vector &passes) { // Apply all the passes std::string pre_pass; + int pass_num = 0; for (const std::string &pass_name : passes) { PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass_name); auto pass = framework::ir::PassRegistry::Instance().Get(pass_name); if (pass_name == "graph_viz_pass") { - std::string dot_file_path = - "ir_" + (pre_pass.empty() ? "origin" : pre_pass) + ".dot"; + std::string dot_file_path = std::to_string(pass_num) + "_ir_" + + (pre_pass.empty() ? "origin" : pre_pass) + + ".dot"; pass->Set("graph_viz_path", new std::string(std::move(dot_file_path))); + pass_num++; } graph_ = pass->Apply(std::move(graph_)); pre_pass = pass_name; From e69d9c845b30d7150f122c41805b1bc5bf75136c Mon Sep 17 00:00:00 2001 From: Bai Yifan Date: Thu, 13 Sep 2018 09:49:22 +0800 Subject: [PATCH 17/17] code fix (#13365) --- paddle/fluid/operators/softmax_with_cross_entropy_op.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu index 148faec4af..a07c17348e 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu @@ -31,7 +31,8 @@ __global__ void CrossEntropyGrad(T* logit_grad, const int64_t* labels, for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch_size; i += blockDim.x * gridDim.x) { int idx = i * class_num + labels[i]; - logit_grad[idx] -= static_cast(1.); + logit_grad[idx] -= + ignore_index == labels[i] ? static_cast(0.) : static_cast(1.); } }