From 8fd845e0fab40acc9539c4109feb3ed411f4dc8b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 30 Sep 2017 16:55:40 -0700 Subject: [PATCH 01/15] Unify Map in OpDescBind --- paddle/framework/op_desc.cc | 27 ++++++++++++++++++++++++++- paddle/framework/op_desc.h | 37 ++++++------------------------------- paddle/platform/enforce.h | 4 ++-- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 0c12c55dc0..33a064890c 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -112,6 +112,30 @@ const std::unordered_map &OpDescBind::GetAttrMap() return attrs_; } +struct SetAttrDescVisitor : public boost::static_visitor { + explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} + mutable OpDesc::Attr *attr_; + void operator()(int v) const { attr_->set_i(v); } + void operator()(float v) const { attr_->set_f(v); } + void operator()(const std::string &v) const { attr_->set_s(v); } + void operator()(bool b) const { attr_->set_b(b); } + + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_ints()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_floats()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_strings()); + } + void operator()(const std::vector &v) const { + VectorToRepeated(v, attr_->mutable_bools()); + } + void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->idx()); } + void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } +}; + void OpDescBind::Sync() { if (need_update_) { this->op_desc_.mutable_inputs()->Clear(); @@ -134,7 +158,8 @@ void OpDescBind::Sync() { attr_desc->set_name(attr.first); attr_desc->set_type( static_cast(attr.second.which() - 1)); - boost::apply_visitor(SetAttrDescVisitor(attr_desc), attr.second); + SetAttrDescVisitor visitor(attr_desc); + boost::apply_visitor(visitor, attr.second); } need_update_ = false; diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 0cf7d13971..e03b4d067f 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include "paddle/framework/attribute.h" +#include "paddle/framework/op_info.h" #include "paddle/framework/var_desc.h" namespace paddle { @@ -61,48 +62,22 @@ class OpDescBind { void SetBlockAttr(const std::string &name, BlockDescBind &block); // Only be used in C++ - void SetAttrMap(const std::unordered_map &attr_map); + void SetAttrMap(const AttributeMap &attr_map); Attribute GetAttr(const std::string &name) const; int GetBlockAttr(const std::string &name) const; // Only be used in C++ - const std::unordered_map &GetAttrMap() const; + const AttributeMap &GetAttrMap() const; private: - struct SetAttrDescVisitor : public boost::static_visitor { - explicit SetAttrDescVisitor(OpDesc::Attr *attr) : attr_(attr) {} - mutable OpDesc::Attr *attr_; - void operator()(int v) const { attr_->set_i(v); } - void operator()(float v) const { attr_->set_f(v); } - void operator()(const std::string &v) const { attr_->set_s(v); } - void operator()(bool b) const { attr_->set_b(b); } - - void operator()(const std::vector &v) const { - VectorToRepeated(v, attr_->mutable_ints()); - } - void operator()(const std::vector &v) const { - VectorToRepeated(v, attr_->mutable_floats()); - } - void operator()(const std::vector &v) const { - VectorToRepeated(v, attr_->mutable_strings()); - } - void operator()(const std::vector &v) const { - VectorToRepeated(v, attr_->mutable_bools()); - } - void operator()(BlockDesc *desc) const { - attr_->set_block_idx(desc->idx()); - } - void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } - }; - void Sync(); OpDesc op_desc_; - std::unordered_map> inputs_; - std::unordered_map> outputs_; - std::unordered_map attrs_; + VariableNameMap inputs_; + VariableNameMap outputs_; + AttributeMap attrs_; // need_update_ indicate there some local changes not be synchronized. If // local changes should be synchronized, need_update_ should be set to true. diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index b523ef03c0..52bd23039b 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -185,7 +185,7 @@ inline void throw_on_error(T e) { std::make_exception_ptr( \ std::runtime_error(paddle::string::Sprintf(__VA_ARGS__))), \ __FILE__, __LINE__); \ - } while (0) + } while (false) #define PADDLE_ENFORCE(...) \ do { \ @@ -195,7 +195,7 @@ inline void throw_on_error(T e) { throw ::paddle::platform::EnforceNotMet(std::current_exception(), \ __FILE__, __LINE__); \ } \ - } while (0) + } while (false) /* * Some enforce helpers here, usage: From 7163dd0413d5b99261ff95e0fab28a09f8abb74a Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Sun, 1 Oct 2017 09:27:44 -0700 Subject: [PATCH 02/15] revert code --- paddle/operators/recurrent_op.cc | 41 +++++++++++++++++++ paddle/operators/recurrent_op.h | 19 +++++++++ .../v2/framework/tests/test_recurrent_op.py | 3 -- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 80de229c33..b9fba3e135 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -28,6 +28,29 @@ using Variable = framework::Variable; using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; +void RecurrentAlgorithm::InferShape(const Scope& scope) const { + auto* input0 = scope.FindVar(arg_->inlinks[0]); + PADDLE_ENFORCE_NOT_NULL(input0); + seq_len_ = input0->GetMutable()->dims()[0]; + PADDLE_ENFORCE_GT(seq_len_, 0); + + CreateScopes(scope); + auto& step_scopes = GetStepScopes(scope); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, + true /*infer_shape_mode*/); + InitMemories(step_scopes[0], true /*infer_shape_mode*/); + + for (size_t i = 0; i < seq_len_; i++) { + if (i > 0) { + rnn::LinkMemories(step_scopes, arg_->memories, i, -1, + true /*infer_shape_mode*/); + } + (*stepnet_)->InferShape(*step_scopes[i]); + } + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, + true /*infer_shape_mode*/); +} + void RecurrentAlgorithm::Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const { auto step_scopes = GetStepScopes(scope); @@ -179,6 +202,24 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( } } +void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { + seq_len_ = + scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; + auto step_scopes = GetStepScopes(scope); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, + true /*infer_shape_mode*/); + for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { + if (static_cast(step_id) != seq_len_ - 1) { + rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, + true /*infer_shape_mode*/); + } + (*stepnet_)->InferShape(*step_scopes[step_id]); + } + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, + true /*infer_shape_mode*/); + LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/); +} + RecurrentGradientOp::RecurrentGradientOp( const std::string& type, const framework::VariableNameMap& inputs, const framework::VariableNameMap& outputs, diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index c6b9a5533e..18f8c53e18 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -41,6 +41,11 @@ class RecurrentAlgorithm { stepnet_ = stepnet; } + /** + * InferShape must be called before Run. + */ + void InferShape(const framework::Scope& scope) const; + protected: /* * The step scopes will be stored in the father scope as a variable. @@ -89,6 +94,11 @@ class RecurrentGradientAlgorithm { void LinkBootMemoryGradients(framework::Scope* step_scopes, bool infer_shape_mode) const; + /** + * InferShape must be called before Run. + */ + void InferShape(const framework::Scope& scope) const; + protected: inline const std::vector& GetStepScopes( const framework::Scope& scope) const { @@ -123,8 +133,13 @@ class RecurrentOp : public framework::OperatorBase { void set_stepnet(std::unique_ptr net) { stepnet_ = std::move(net); } + const OperatorBase& stepnet() const { return *stepnet_; } + void InferShape(const framework::Scope& scope) const { + alg_.InferShape(scope); + } + static const rnn::ArgumentName kArgName; private: @@ -147,6 +162,10 @@ class RecurrentGradientOp : public framework::OperatorBase { PADDLE_THROW("Not Implemented"); } + void InferShape(const framework::Scope& scope) const { + alg_.InferShape(scope); + } + void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const override { alg_.Run(scope, dev_ctx); diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 92161ae5dd..6b9e7a88ce 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -197,7 +197,4 @@ class RecurrentGradientOpTest(unittest.TestCase): if __name__ == '__main__': - exit( - 0 - ) # FIXME(yuyang18): InferShape has been removed, this unittest may error unittest.main() From 5423cb3e57949fc2885e39016422bf92b70b5260 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Sun, 1 Oct 2017 09:54:08 -0700 Subject: [PATCH 03/15] format --- paddle/framework/block_desc.h | 6 +++--- paddle/framework/op_info.h | 8 +++----- paddle/framework/program_desc.h | 6 +++--- paddle/framework/scope.h | 8 +++----- paddle/platform/macros.h | 10 ++++++---- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 1a1135bab4..59513ede33 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/framework/op_desc.h" #include "paddle/framework/var_desc.h" +#include "paddle/platform/macros.h" namespace paddle { namespace framework { @@ -34,9 +35,6 @@ class BlockDescBind { BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) : prog_(prog), desc_(desc), need_update_(false) {} - BlockDescBind(const BlockDescBind &o) = delete; - BlockDescBind &operator=(const BlockDescBind &o) = delete; - int32_t ID() const { return desc_->idx(); } int32_t Parent() const { return desc_->parent_idx(); } @@ -66,6 +64,8 @@ class BlockDescBind { std::deque> ops_; std::unordered_map> vars_; + + DISABLE_COPY_AND_ASSIGN(BlockDescBind); }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index 6d1ee4dece..5df3093318 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -19,6 +19,7 @@ #include #include "paddle/framework/attribute.h" #include "paddle/framework/op_desc.h" +#include "paddle/platform/macros.h" namespace paddle { namespace framework { @@ -72,11 +73,6 @@ class OpInfoMap { public: static OpInfoMap& Instance(); - OpInfoMap(const OpInfoMap& o) = delete; - OpInfoMap(OpInfoMap&& o) = delete; - OpInfoMap& operator=(const OpInfoMap& o) = delete; - OpInfoMap& operator=(OpInfoMap&& o) = delete; - bool Has(const std::string& op_type) const { return map_.find(op_type) != map_.end(); } @@ -112,6 +108,8 @@ class OpInfoMap { private: OpInfoMap() = default; std::unordered_map map_; + + DISABLE_COPY_AND_ASSIGN(OpInfoMap); }; } // namespace framework diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index 06ffcd4b15..9b34a06aef 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/framework/framework.pb.h" +#include "paddle/platform/macros.h" namespace paddle { namespace framework { @@ -26,9 +27,6 @@ class ProgramDescBind { public: static ProgramDescBind &Instance(ProgramDesc *prog); - ProgramDescBind(const ProgramDescBind &o) = delete; - ProgramDescBind &operator=(const ProgramDescBind &o) = delete; - BlockDescBind *AppendBlock(const BlockDescBind &parent); BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); } @@ -46,6 +44,8 @@ class ProgramDescBind { ProgramDesc *prog_; std::vector> blocks_; + + DISABLE_COPY_AND_ASSIGN(ProgramDescBind); }; } // namespace framework } // namespace paddle diff --git a/paddle/framework/scope.h b/paddle/framework/scope.h index c93b03e481..7047f0d55e 100644 --- a/paddle/framework/scope.h +++ b/paddle/framework/scope.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/framework/variable.h" +#include "paddle/platform/macros.h" namespace paddle { namespace framework { @@ -38,11 +39,6 @@ class Scope { Scope() {} ~Scope(); - // Disable Copy, Assign, Move. - Scope(const Scope& other) = delete; - Scope& operator=(const Scope& other) = delete; - Scope(Scope&& other) = delete; - /// Create a sub-scope. Returns a reference other than a pointer so /// to prevent from manual deletion. /// Mark it to const because that new kid scope cannot change parent scope. @@ -73,6 +69,8 @@ class Scope { std::unordered_map vars_; mutable std::list kids_; Scope const* parent_{nullptr}; + + DISABLE_COPY_AND_ASSIGN(Scope); }; } // namespace framework diff --git a/paddle/platform/macros.h b/paddle/platform/macros.h index 4a04a38c0c..feae7bdd77 100644 --- a/paddle/platform/macros.h +++ b/paddle/platform/macros.h @@ -16,8 +16,10 @@ limitations under the License. */ // Disable the copy and assignment operator for a class. #ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ - private: \ - classname(const classname&) = delete; \ - classname& operator=(const classname&) = delete +#define DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname&) = delete; \ + classname(const classname&&) = delete; \ + classname& operator=(const classname&) = delete; \ + classname& operator=(const classname&&) = delete #endif From 2296d81cf9560437b368354229b7ceb22b67d234 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 2 Oct 2017 11:39:10 -0700 Subject: [PATCH 04/15] Use `type_defs.h` to resolve cyclic dependencies --- paddle/framework/attribute.h | 10 +--------- paddle/framework/op_desc.h | 2 +- paddle/framework/op_info.h | 7 +------ paddle/framework/type_defs.h | 38 ++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 16 deletions(-) create mode 100644 paddle/framework/type_defs.h diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index c7559cefb6..d13530e340 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -21,20 +21,12 @@ limitations under the License. */ #include #include "paddle/framework/framework.pb.h" +#include "paddle/framework/type_defs.h" #include "paddle/platform/enforce.h" -#include "paddle/platform/variant.h" namespace paddle { namespace framework { -// The order should be as same as framework.proto -typedef boost::variant, - std::vector, std::vector, bool, - std::vector, BlockDesc*> - Attribute; - -typedef std::unordered_map AttributeMap; - ProgramDesc& GetProgramDesc(); template diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index e03b4d067f..0af4169715 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "paddle/framework/attribute.h" -#include "paddle/framework/op_info.h" +#include "paddle/framework/type_defs.h" #include "paddle/framework/var_desc.h" namespace paddle { diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index 6d1ee4dece..470336d367 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -19,15 +19,10 @@ #include #include "paddle/framework/attribute.h" #include "paddle/framework/op_desc.h" +#include "paddle/framework/type_defs.h" namespace paddle { namespace framework { -class OperatorBase; -using VariableNameMap = std::map>; - -using OpCreator = std::function; class GradOpDescMakerBase { public: diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h new file mode 100644 index 0000000000..dec5066f1e --- /dev/null +++ b/paddle/framework/type_defs.h @@ -0,0 +1,38 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/platform/variant.h" + +namespace paddle { +namespace framework { +class OperatorBase; +using VariableNameMap = std::map>; + +// The order should be as same as framework.proto +using Attribute = + boost::variant, + std::vector, std::vector, bool, + std::vector, BlockDesc*>; + +using AttributeMap = std::unordered_map; + +using OpCreator = std::function; + +} // namespace framework +} // namespace paddle From 32f5c9dd934e7de15a93a8145bf6ee4499b3bc7d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 2 Oct 2017 11:51:24 -0700 Subject: [PATCH 05/15] recurrent_op pass the unit test --- paddle/operators/recurrent_op.cc | 87 +++++-------------- paddle/operators/recurrent_op.h | 23 +---- paddle/operators/rnn/recurrent_op_utils.cc | 55 +++++------- paddle/operators/rnn/recurrent_op_utils.h | 6 +- paddle/operators/sum_op.cc | 5 +- .../v2/framework/tests/test_recurrent_op.py | 26 +++--- 6 files changed, 66 insertions(+), 136 deletions(-) diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index b9fba3e135..016e2043fd 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -28,7 +28,8 @@ using Variable = framework::Variable; using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -void RecurrentAlgorithm::InferShape(const Scope& scope) const { +void RecurrentAlgorithm::Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const { auto* input0 = scope.FindVar(arg_->inlinks[0]); PADDLE_ENFORCE_NOT_NULL(input0); seq_len_ = input0->GetMutable()->dims()[0]; @@ -36,38 +37,16 @@ void RecurrentAlgorithm::InferShape(const Scope& scope) const { CreateScopes(scope); auto& step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, - true /*infer_shape_mode*/); - InitMemories(step_scopes[0], true /*infer_shape_mode*/); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); + InitMemories(step_scopes[0]); for (size_t i = 0; i < seq_len_; i++) { if (i > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, i, -1, - true /*infer_shape_mode*/); - } - (*stepnet_)->InferShape(*step_scopes[i]); - } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, - true /*infer_shape_mode*/); -} - -void RecurrentAlgorithm::Run(const Scope& scope, - const platform::DeviceContext& dev_ctx) const { - auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, - false /*infer_shape_mode*/); - InitMemories(step_scopes[0], false /*infer_shape_mode*/); - - for (size_t step_id = 0; step_id < seq_len_; step_id++) { - // create output alias variables - if (step_id > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, - false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, arg_->memories, i, -1); } - (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); + (*stepnet_)->Run(*step_scopes[i], dev_ctx); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, - false /*infer_shape_mode*/); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); } void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { @@ -105,8 +84,7 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { } } -void RecurrentAlgorithm::InitMemories(Scope* step_scope, - bool infer_shape_mode) const { +void RecurrentAlgorithm::InitMemories(Scope* step_scope) const { for (auto& attr : arg_->memories) { auto* pre_mem = step_scope->NewVar(attr.pre_var)->GetMutable(); PADDLE_ENFORCE(step_scope->FindVar(attr.boot_var) != nullptr, @@ -114,12 +92,9 @@ void RecurrentAlgorithm::InitMemories(Scope* step_scope, attr.boot_var); auto* boot_mem = step_scope->FindVar(attr.boot_var)->GetMutable(); - if (infer_shape_mode) { - pre_mem->Resize(boot_mem->dims()); - PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2); - } else { - pre_mem->ShareDataWith(*boot_mem); - } + pre_mem->Resize(boot_mem->dims()); + PADDLE_ENFORCE_EQ(pre_mem->dims().size(), 2); + pre_mem->ShareDataWith(*boot_mem); } } @@ -169,23 +144,22 @@ class RecurrentAlgorithmProtoAndCheckerMaker void RecurrentGradientAlgorithm::Run( const Scope& scope, const platform::DeviceContext& dev_ctx) const { + seq_len_ = + scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, - false /*infer_shape_mode*/); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, - false /*infer_shape_mode*/); + rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); } (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); } - LinkBootMemoryGradients(step_scopes[0], false); - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, - false /*infer_shape_mode*/); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); + LinkBootMemoryGradients(step_scopes[0]); } void RecurrentGradientAlgorithm::LinkBootMemoryGradients( - Scope* step_scope, bool infer_shape_mode) const { + Scope* step_scope) const { for (auto& attr : arg_->memories) { PADDLE_ENFORCE(step_scope->FindVar(attr.var) != nullptr, "memory variable [%s] does not exists", attr.var); @@ -194,30 +168,9 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( auto* mem_grad = step_scope->NewVar(attr.var)->GetMutable(); auto* boot_mem_grad = step_scope->NewVar(attr.boot_var)->GetMutable(); - if (infer_shape_mode) { - boot_mem_grad->Resize(mem_grad->dims()); - } else { - boot_mem_grad->ShareDataWith(*mem_grad); - } - } -} - -void RecurrentGradientAlgorithm::InferShape(const Scope& scope) const { - seq_len_ = - scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; - auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, - true /*infer_shape_mode*/); - for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { - if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, - true /*infer_shape_mode*/); - } - (*stepnet_)->InferShape(*step_scopes[step_id]); + boot_mem_grad->Resize(mem_grad->dims()); + boot_mem_grad->ShareDataWith(*mem_grad); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, - true /*infer_shape_mode*/); - LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/); } RecurrentGradientOp::RecurrentGradientOp( diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 18f8c53e18..752025e42c 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -41,11 +41,6 @@ class RecurrentAlgorithm { stepnet_ = stepnet; } - /** - * InferShape must be called before Run. - */ - void InferShape(const framework::Scope& scope) const; - protected: /* * The step scopes will be stored in the father scope as a variable. @@ -61,7 +56,7 @@ class RecurrentAlgorithm { ->GetMutable>(); } - void InitMemories(framework::Scope* step_scopes, bool infer_shape_mode) const; + void InitMemories(framework::Scope* step_scopes) const; private: std::unique_ptr* stepnet_; @@ -91,13 +86,7 @@ class RecurrentGradientAlgorithm { void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const; - void LinkBootMemoryGradients(framework::Scope* step_scopes, - bool infer_shape_mode) const; - - /** - * InferShape must be called before Run. - */ - void InferShape(const framework::Scope& scope) const; + void LinkBootMemoryGradients(framework::Scope* step_scopes) const; protected: inline const std::vector& GetStepScopes( @@ -136,10 +125,6 @@ class RecurrentOp : public framework::OperatorBase { const OperatorBase& stepnet() const { return *stepnet_; } - void InferShape(const framework::Scope& scope) const { - alg_.InferShape(scope); - } - static const rnn::ArgumentName kArgName; private: @@ -162,10 +147,6 @@ class RecurrentGradientOp : public framework::OperatorBase { PADDLE_THROW("Not Implemented"); } - void InferShape(const framework::Scope& scope) const { - alg_.InferShape(scope); - } - void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const override { alg_.Run(scope, dev_ctx); diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index a767009d23..a02994f99d 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -25,7 +25,7 @@ using LoDTensor = framework::LoDTensor; void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, - const size_t seq_len, bool infer_shape_mode) { + const size_t seq_len) { PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); for (size_t i = 0; i < inlinks.size(); ++i) { // global inputs @@ -41,11 +41,9 @@ void SegmentInputs(const std::vector& step_scopes, for (size_t j = 0; j < seq_len; j++) { Tensor* step_input = step_scopes[j]->NewVar(inlinks[i])->GetMutable(); - if (!infer_shape_mode) { - // The input of operators of each step is Tensor here. - // Maybe need to modify Slice function. - *step_input = input->Slice(j, j + 1); - } + // The input of operators of each step is Tensor here. + // Maybe need to modify Slice function. + *step_input = input->Slice(j, j + 1); step_input->Resize(step_dims); } } @@ -53,39 +51,35 @@ void SegmentInputs(const std::vector& step_scopes, void ConcatOutputs(const std::vector& step_scopes, const std::vector& outlinks, - const size_t seq_len, bool infer_shape_mode) { + const size_t seq_len) { for (size_t i = 0; i < outlinks.size(); i++) { auto output_var = step_scopes[0]->parent().FindVar(outlinks[i]); PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", outlinks[i]); LoDTensor* output = output_var->GetMutable(); - if (infer_shape_mode) { - auto step_scope_var = step_scopes[0]->FindVar(outlinks[i]); - PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); - f::DDim step_dims = - step_scope_var->template GetMutable()->dims(); - std::vector dims_vec = vectorize(step_dims); - dims_vec.insert(dims_vec.begin(), seq_len); - output->Resize(f::make_ddim(dims_vec)); - } else { - output->mutable_data(platform::CPUPlace()); - for (size_t j = 0; j < seq_len; j++) { - LoDTensor* step_output = - step_scopes[j]->FindVar(outlinks[i])->GetMutable(); - // TODO(luotao02) data type and platform::DeviceContext() should set - // correctly - (output->Slice(j, j + 1)) - .CopyFrom(*step_output, platform::CPUPlace()); - } + auto step_scope_var = step_scopes[0]->FindVar(outlinks[i]); + PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); + f::DDim step_dims = + step_scope_var->template GetMutable()->dims(); + std::vector dims_vec = vectorize(step_dims); + dims_vec.insert(dims_vec.begin(), seq_len); + output->Resize(f::make_ddim(dims_vec)); + output->mutable_data(platform::CPUPlace()); + for (size_t j = 0; j < seq_len; j++) { + LoDTensor* step_output = + step_scopes[j]->FindVar(outlinks[i])->GetMutable(); + // TODO(luotao02) data type and platform::DeviceContext() should set + // correctly + (output->Slice(j, j + 1)) + .CopyFrom(*step_output, platform::CPUPlace()); } } } void LinkMemories(const std::vector& scopes, const std::vector& memories, - const size_t step_id, const int offset, - bool infer_shape_mode) { + const size_t step_id, const int offset) { PADDLE_ENFORCE_LT(step_id, scopes.size(), "step [%d] is out of range of step scopes' size [%d]", step_id, scopes.size()); @@ -100,11 +94,8 @@ void LinkMemories(const std::vector& scopes, for (auto& attr : memories) { auto mem = scope->FindVar(attr.pre_var)->GetMutable(); auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); - if (infer_shape_mode) { - mem->Resize(linked_mem->dims()); - } else { - mem->ShareDataWith(*linked_mem); - } + mem->Resize(linked_mem->dims()); + mem->ShareDataWith(*linked_mem); } } diff --git a/paddle/operators/rnn/recurrent_op_utils.h b/paddle/operators/rnn/recurrent_op_utils.h index 9c777f1e90..fd17b9b889 100644 --- a/paddle/operators/rnn/recurrent_op_utils.h +++ b/paddle/operators/rnn/recurrent_op_utils.h @@ -64,18 +64,18 @@ struct ArgumentName { */ void SegmentInputs(const std::vector& step_scopes, const std::vector& inlinks, - const size_t seq_len, bool infer_shape_mode); + const size_t seq_len); /** * Process outputs of step nets and merge to variables. */ void ConcatOutputs(const std::vector& step_scopes, const std::vector& outlinks, - const size_t seq_len, bool infer_shape_mode); + const size_t seq_len); void LinkMemories(const std::vector& step_scopes, const std::vector& memories, const size_t step_id, - const int offset, bool infer_shape_mode); + const int offset); void InitArgument(const ArgumentName& name, Argument* arg, const framework::OperatorBase& op, bool is_grad = false); diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 5d76313aeb..c54843faa6 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -22,14 +22,15 @@ class SumOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase* ctx) const override { + PADDLE_ENFORCE(ctx->HasInputs("X"), "Inputs(X) should not be null"); auto x_dims = ctx->GetInputsDim("X"); - PADDLE_ENFORCE(!x_dims.empty(), "Input(X) of SumOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SumOp should not be null."); - auto in_dim = x_dims[0]; size_t N = x_dims.size(); PADDLE_ENFORCE_GT(N, 1, "Input tensors count should > 1."); + + auto in_dim = x_dims[0]; for (size_t i = 1; i < N; i++) { auto dim = x_dims[i]; PADDLE_ENFORCE(in_dim == dim, "Input tensors must have same shape"); diff --git a/python/paddle/v2/framework/tests/test_recurrent_op.py b/python/paddle/v2/framework/tests/test_recurrent_op.py index 6b9e7a88ce..1f114432c0 100644 --- a/python/paddle/v2/framework/tests/test_recurrent_op.py +++ b/python/paddle/v2/framework/tests/test_recurrent_op.py @@ -16,14 +16,17 @@ class PySimpleRNN(object): ''' def __init__(self, input_dim=30, batch_size=50, weight_dim=15, sent_len=11): - self.x = np.random.normal(size=(sent_len, batch_size, input_dim)) - self.W = np.random.normal(size=(input_dim, input_dim)) - self.U = np.random.normal(size=(input_dim, input_dim)) - self.h_boot = np.random.normal(size=(batch_size, input_dim)) + self.x = np.random.normal(size=(sent_len, batch_size, + input_dim)).astype("float32") + self.W = np.random.normal(size=(input_dim, input_dim)).astype("float32") + self.U = np.random.normal(size=(input_dim, input_dim)).astype("float32") + self.h_boot = np.random.normal(size=(batch_size, + input_dim)).astype("float32") # memories self.mems = [ - np.zeros(shape=(batch_size, input_dim)) for i in range(sent_len) + np.zeros(shape=(batch_size, input_dim)).astype("float32") + for i in range(sent_len) ] def forward(self): @@ -36,7 +39,7 @@ class PySimpleRNN(object): return [self.x[i] for i in range(self.x.shape[0])] def concat_outputs(self): - return np.array(self.mems) + return np.array(self.mems).astype("float32") def step(self, step_id, x): ''' @@ -47,8 +50,8 @@ class PySimpleRNN(object): pre_mem = self.mems[step_id - 1] else: pre_mem = self.h_boot - xW = np.matmul(x, self.W) - hU = np.matmul(pre_mem, self.U) + xW = np.matmul(x, self.W).astype("float32") + hU = np.matmul(pre_mem, self.U).astype("float32") sum = xW + hU self.mems[step_id] = py_sigmoid(sum) @@ -102,7 +105,8 @@ class RecurrentOpTest(unittest.TestCase): self.create_step_net() ctx = core.DeviceContext.create(core.CPUPlace()) self.rnnop.run(self.scope, ctx) - return np.array(self.scope.find_var("h@mem").get_tensor()) + return np.array(self.scope.find_var("h@mem").get_tensor()).astype( + "float32") def create_global_variables(self): # create inlink @@ -142,7 +146,7 @@ class RecurrentOpTest(unittest.TestCase): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@mem") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: @@ -179,7 +183,7 @@ class RecurrentGradientOpTest(unittest.TestCase): stepnet = core.Net.create() x_fc_op = Operator("mul", X="x@alias", Y="W", Out="Wx") h_fc_op = Operator("mul", X="h@pre", Y="U", Out="Uh") - sum_op = Operator("add", X="Wx", Y="Uh", Out="sum") + sum_op = Operator("sum", X=["Wx", "Uh"], Out="sum") sig_op = Operator("sigmoid", X="sum", Y="h@alias") for op in [x_fc_op, h_fc_op, sum_op, sig_op]: From c705f065ba403606d39bc972d85f7eba1920f029 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Mon, 2 Oct 2017 16:14:48 -0400 Subject: [PATCH 06/15] add TensorArray (#4459) * add tensor array * update * set type --- paddle/framework/CMakeLists.txt | 3 + paddle/framework/tensor_array.cc | 283 ++++++++++++++++++++++++++ paddle/framework/tensor_array.h | 118 +++++++++++ paddle/framework/tensor_array_test.cc | 130 ++++++++++++ 4 files changed, 534 insertions(+) create mode 100644 paddle/framework/tensor_array.cc create mode 100644 paddle/framework/tensor_array.h create mode 100644 paddle/framework/tensor_array_test.cc diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 9140854a96..5d394132b7 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -43,3 +43,6 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) + +cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor) +cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place) diff --git a/paddle/framework/tensor_array.cc b/paddle/framework/tensor_array.cc new file mode 100644 index 0000000000..d54714c66c --- /dev/null +++ b/paddle/framework/tensor_array.cc @@ -0,0 +1,283 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + + + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/tensor_array.h" + +#include +#include +#include + +namespace paddle { +namespace framework { + +namespace detail { + +/* + * Offer an iterator over the length-sorted lod-tensor's top level. The top + * level of a lod-tensor stores batch-size of sequences, each top-level sequence + * may contains several lower-level sequences, sort top-level lod by the numbers + * of lower-level sequences in descending order, so that during RNN's running, + * the batch-size will keep decreasing, the short sentences will end at the tail + * of each batch. + * + * Let's take a simple lod-tensor for example + * + * |(0) |(1) top-level has two instances + * ||| ||||| lower-level + * + * sort by lower-level's length + * + * |(1) |(0) + * ||||| ||| + * + * when RNN runs, it get 5 batches (equals the number of elements the longest + * sequence has) + * + * ||||| + * ||| + * + * the first three batches has two elements, the last two elements just has 1 + * element each. + */ +struct DynamicBatchUnpacker { + using value_type = float; + + DynamicBatchUnpacker(const LoDTensor& source, size_t level, + bool descend = true) + : source(&source), level(level) { + BuildLengthSortedMeta(descend); + } + + LoDTensor GetBatch(size_t index); + + std::vector meta; + + LoDTensor const* source; + size_t level; + + protected: + void BuildLengthSortedMeta(bool descend); +}; + +LoDTensor PackDynamicBatch(const std::vector& source, + const std::vector& meta, const LoD& lod, + size_t level); + +} // namespace detail + +const LoDTensor& TensorArray::Read(size_t index) const { + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); + if (index >= size()) { + values_.resize(index + 1); + } + return values_[index]; +} + +void TensorArray::Write(size_t index, const LoDTensor& value) { + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); + + if (index >= size()) { + values_.resize(index + 1); + } + + values_[index].Resize(value.dims()); + values_[index].mutable_data(platform::CPUPlace()); + values_[index].CopyFrom(value, platform::CPUPlace()); +} + +void TensorArray::WriteShared(size_t index, const LoDTensor& value) { + PADDLE_ENFORCE_LE(index, MAX_SIZE, "index[%d] too large", index); + if (index >= size()) { + values_.resize(index + 1); + } + + values_[index].ShareDataWith(value); +} + +LoDTensor TensorArray::Pack(size_t level, const std::vector& meta, + const LoD& lod) const { + return detail::PackDynamicBatch(values_, meta, lod, level); +} + +std::vector TensorArray::Unpack(const LoDTensor& source, int level, + bool length_desend) { + detail::DynamicBatchUnpacker unpacker(source, level, + length_desend /*descend*/); + + // find max length of all the sequences + size_t max_length = 0; + for (const auto& seq : unpacker.meta) { + max_length = std::max(max_length, seq.end - seq.begin); + } + + // write batches to values + for (size_t batch_id = 0; batch_id < max_length; batch_id++) { + Write(batch_id, unpacker.GetBatch(batch_id)); + } + + return unpacker.meta; +} + +LoDTensor TensorArray::Stack() const { + LoDTensor result; + if (size() == 0) return result; + + const auto& first_dims = values_.front().dims(); + // check all the values have the same shape + // TODO(superjom) check the same dtypes + for (size_t idx = 1; idx < size(); idx++) { + const auto& value_dims = values_[idx].dims(); + PADDLE_ENFORCE_EQ(first_dims, value_dims); + } + + // copy + auto result_dims = vectorize(first_dims); + result_dims.insert(result_dims.begin(), size()); + result.Resize(make_ddim(result_dims)); + result.mutable_data(platform::CPUPlace()); + + for (size_t idx = 0; idx < size(); idx++) { + result.Slice(idx, idx + 1) + .CopyFrom(Read(idx), platform::CPUPlace()); + } + return result; +} + +void TensorArray::Unstack(const LoDTensor& source) const { + Unstack(source, false /*data_shared*/); +} + +void TensorArray::UnstackShared(const LoDTensor& source) const { + Unstack(source, true /*data_shared*/); +} + +void TensorArray::Unstack(const LoDTensor& source, bool data_shared) const { + size_t first_dim = source.dims()[0]; + DDim value_dims = slice_ddim(source.dims(), 1, source.dims().size()); + PADDLE_ENFORCE_GT(first_dim, 0, + "source should have some data to be unstacked"); + + values_.resize(first_dim); + + for (size_t elem = 0; elem < first_dim; elem++) { + // create a new value + auto& value = values_[elem]; + if (data_shared) { + // share memory + value.ShareDataWith(source.Slice(elem, elem + 1)); + } else { + // copy + value.Resize(value_dims); + value.CopyFrom(source.Slice(elem, elem + 1), + platform::CPUPlace()); + } + } +} + +size_t TensorArray::size() const { return values_.size(); } + +namespace detail { + +void DynamicBatchUnpacker::BuildLengthSortedMeta(bool descend) { + PADDLE_ENFORCE(meta.empty(), "duplicate build meta"); + // collect meta for each sequence in some level + auto lod = SliceLevels(source->lod(), level, level + 1)[0]; + + for (size_t seq_id = 0; seq_id < lod.size() - 1; seq_id++) { + DySeqMeta seq_meta({lod[seq_id], lod[seq_id + 1], seq_id}); + meta.push_back(seq_meta); + } + + PADDLE_ENFORCE_GT(meta.size(), 0, "meta is empty"); + + // sort by length + sort(meta.begin(), meta.end(), + [descend](const DySeqMeta& a, const DySeqMeta& b) { + bool a_ge_b = (a.end - a.begin) > (b.end - b.begin); + return descend ? a_ge_b : !a_ge_b; + }); +} + +LoDTensor DynamicBatchUnpacker::GetBatch(size_t index) { + PADDLE_ENFORCE(!meta.empty(), "should build meta first"); + LoDTensor result; + + // collect indice need to copy to the batch + std::vector indice; + for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { + const auto& seq_meta = meta[seq_id]; + if (index >= seq_meta.end) break; + indice.push_back(seq_meta.begin + index); + } + + PADDLE_ENFORCE(!indice.empty(), "invalid batch at %d", index); + + // copy the indice of records in LoDTensor + auto record_dims = slice_ddim(source->dims(), 1, source->dims().size()); + auto record_dims_vec = vectorize(record_dims); + record_dims_vec.insert(record_dims_vec.begin(), indice.size()); + result.Resize(make_ddim(record_dims_vec)); + result.mutable_data(platform::CPUPlace()); + + for (size_t i = 0; i < indice.size() - 1; i++) { + auto index = indice[i]; + auto target = result.Slice(i, i + 1); + auto source_ = source->Slice(index, index + 1); + target.CopyFrom(source_, platform::CPUPlace()); + } + + return result; +} + +LoDTensor PackDynamicBatch(const std::vector& source, + const std::vector& meta, const LoD& lod, + size_t level) { + PADDLE_ENFORCE(!source.empty()); + PADDLE_ENFORCE(!meta.empty()); + PADDLE_ENFORCE(!lod.empty()); + + LoDTensor result; + + // init result space + auto record_dims = slice_ddim(source[0].dims(), 1, source[0].dims().size()); + auto record_dims_vec = vectorize(record_dims); + auto height = lod[level].back(); + record_dims_vec.insert(record_dims_vec.begin(), height); + result.Resize(make_ddim(record_dims_vec)); + result.mutable_data(platform::CPUPlace()); + + for (size_t batch_id = 0; batch_id < source.size(); batch_id++) { + for (size_t seq_id = 0; seq_id < meta.size(); seq_id++) { + const auto& seq_meta = meta[seq_id]; + // source is source[batch_id][seq_id] + // target is result[index] + auto index = seq_meta.begin + batch_id; + if (index >= seq_meta.end) break; + auto source_ = source[batch_id].Slice(seq_id, seq_id + 1); + auto target = result.Slice(index, index + 1); + target.CopyFrom(source_, platform::CPUPlace()); + } + } + + result.set_lod(lod); + + return result; +} + +} // namespace detail + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h new file mode 100644 index 0000000000..e76f33d2c0 --- /dev/null +++ b/paddle/framework/tensor_array.h @@ -0,0 +1,118 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include + +#include "paddle/framework/lod_tensor.h" + +namespace paddle { +namespace framework { + +/* + * DyBatchSeqPosition stores indices of the basic element in tensor. It is used + * after lod-tensor's re-assembling, its info can be used to recover the order + * in original lod-tensor. + */ +struct DySeqMeta { + size_t begin; + size_t end; // not included + size_t ori_idx; +}; + +/* + * TensorArray is a C-array-like array of tensors, it is meant to be used with + * dynamic iteration primitives such as while_loop. It is used to segment inputs + * and store states in all time steps. + * + * By providing some methods similar to a C++ array, the difinition of some + * state-based dynamic models such as RNN cound be more natural and highly + * flexible. + */ +class TensorArray { + public: + using value_type = float; + + // max number of values allowed to store. + const size_t MAX_SIZE{100000}; + + /* + * Inputs: + * - value_shared: share memory between tensors. + */ + explicit TensorArray(bool values_shared = true) + : values_shared_(values_shared) {} + + /* + * Read the value at location `index` in the `TensorArray`. + */ + const LoDTensor &Read(size_t index) const; + + /* + * Write value into the index of the TensorArray. + */ + void Write(size_t index, const LoDTensor &value); + + /* + * Write value into the index of the TensorArray, with memory shared. + */ + void WriteShared(size_t index, const LoDTensor &value); + + /* + * Recover the original LoD-arranged LoDTensor with the `values`, `level` and + * `indice_map`. + */ + LoDTensor Pack(size_t level, const std::vector &meta, + const LoD &lod) const; + + /* + * Split LoDTensor in some `level` and write the generated batches to + * `values`, if set `desend`, will sort by length in descending order else in + * ascending order. + */ + std::vector Unpack(const LoDTensor &source, int level, + bool length_desend); + + /* + * Pack the values into a tensor with rank one higher than each tensor in + * values. + */ + LoDTensor Stack() const; + + /* + * Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors. + */ + void Unstack(const LoDTensor &source) const; + + /* + * Unpacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors, + * with memory of tensors shared. + */ + void UnstackShared(const LoDTensor &source) const; + + /* + * Return the number of values. + */ + size_t size() const; + + protected: + void Unstack(const LoDTensor &source, bool data_shared) const; + + private: + mutable std::vector values_; + bool values_shared_; +}; // class TensorArray + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor_array_test.cc b/paddle/framework/tensor_array_test.cc new file mode 100644 index 0000000000..d9f52509cd --- /dev/null +++ b/paddle/framework/tensor_array_test.cc @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/tensor_array.h" + +#include + +namespace paddle { +namespace framework { + +class TensorArrayTester : public ::testing::Test { + protected: + void SetUp() override { + LoDTensor source; + source.Resize(make_ddim({batch_size, dim})); + int* data = source.mutable_data(platform::CPUPlace()); + for (int i = 0; i < 16 * 32; i++) { + data[i] = i; + } + ta.Unstack(source); + } + + TensorArray ta; + const int batch_size = 16; + const int dim = 32; +}; + +TEST_F(TensorArrayTester, Read) { + for (int i = 0; i < batch_size; i++) { + const auto& tensor = ta.Read(i); + ASSERT_EQ(tensor.dims()[0], 1); + ASSERT_EQ(tensor.dims()[1], dim); + } +} + +TEST_F(TensorArrayTester, Write) { + LoDTensor source; + source.Resize(make_ddim({1, dim})); + for (int i = 0; i < dim; i++) { + *(source.mutable_data(platform::CPUPlace()) + i) = i; + } + + ta.Write(2, source); + + const auto& tensor = ta.Read(2); + for (int i = 0; i < dim; i++) { + EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); + } +} + +TEST_F(TensorArrayTester, WriteShared) { + LoDTensor source; + source.Resize(make_ddim({1, dim})); + for (int i = 0; i < dim; i++) { + *(source.mutable_data(platform::CPUPlace()) + i) = i; + } + + ta.WriteShared(2, source); + + const auto& tensor = ta.Read(2); + for (int i = 0; i < dim; i++) { + EXPECT_EQ(*(tensor.data() + i), *(source.data() + i)); + } + + EXPECT_EQ(source.data(), tensor.data()); +} + +class TensorArrayPackTester : public ::testing::Test { + protected: + virtual void SetUp() override { + lod.push_back(std::vector{0, 2, 9, 13}); + + source.set_lod(lod); + source.Resize(make_ddim({13, 128})); + source.mutable_data(platform::CPUPlace()); + + // content of each setence: 0 1 2 3 4 + const auto& level = lod.front(); + for (size_t i = 0; i < level.size() - 1; i++) { + size_t begin = level[i]; + size_t end = level[i + 1]; + for (size_t j = begin; j < end; j++) { + auto record = source.Slice(j, j + 1); + for (int dim = 0; dim < 128; dim++) { + record.mutable_data(platform::CPUPlace())[dim] = j - begin; + } + } + } + + // unpack + meta = ta.Unpack(source, 0, true); + } + + LoD lod; + TensorArray ta; + LoDTensor source; + std::vector meta; +}; + +TEST_F(TensorArrayPackTester, Unpack) { + ASSERT_EQ(ta.size(), 7UL); + + const auto& t0 = ta.Read(0); + const auto& t1 = ta.Read(1); + + ASSERT_EQ(t0.data()[0], int(0)); + ASSERT_EQ(t1.data()[0], int(1)); +} + +TEST_F(TensorArrayPackTester, Pack) { + LoDTensor packed = ta.Pack(0, meta, lod); +} + +TEST_F(TensorArrayTester, size) { + ASSERT_EQ(ta.size(), static_cast(batch_size)); +} + +} // namespace framework +} // namespace paddle From 37bbaabdf1abfe14e19cf7dcb7a842a10b36d1c8 Mon Sep 17 00:00:00 2001 From: dongzhihong Date: Mon, 2 Oct 2017 14:17:17 -0700 Subject: [PATCH 07/15] "fix conflict" --- paddle/framework/op_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index 7940922b09..9672e540c8 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -20,7 +20,7 @@ #include "paddle/framework/attribute.h" #include "paddle/framework/op_desc.h" #include "paddle/framework/type_defs.h" - +#include "paddle/platform/macros.h" namespace paddle { namespace framework { From 6b051b651ae72305d9877fd3cd094028c21bdddb Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 2 Oct 2017 14:24:03 -0700 Subject: [PATCH 08/15] optimize code --- paddle/operators/recurrent_op.cc | 38 ++++++++++++---------- paddle/operators/recurrent_op.h | 4 +-- paddle/operators/rnn/recurrent_op_utils.cc | 8 ++--- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index 016e2043fd..bcd6a3410a 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -32,24 +32,25 @@ void RecurrentAlgorithm::Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const { auto* input0 = scope.FindVar(arg_->inlinks[0]); PADDLE_ENFORCE_NOT_NULL(input0); - seq_len_ = input0->GetMutable()->dims()[0]; - PADDLE_ENFORCE_GT(seq_len_, 0); + size_t seq_len = input0->GetMutable()->dims()[0]; + PADDLE_ENFORCE_GT(seq_len, 0); - CreateScopes(scope); + CreateScopes(scope, seq_len); auto& step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len); InitMemories(step_scopes[0]); - for (size_t i = 0; i < seq_len_; i++) { - if (i > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, i, -1); + for (size_t step_id = 0; step_id < seq_len; step_id++) { + if (step_id > 0) { + rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1); } - (*stepnet_)->Run(*step_scopes[i], dev_ctx); + (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len); } -void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { +void RecurrentAlgorithm::CreateScopes(const Scope& scope, + size_t seq_len) const { // TODO(superjom) Only two scopes are needed for inference, this case will be // supported later. auto step_scopes_var = scope.FindVar(arg_->step_scopes); @@ -60,8 +61,8 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope) const { PADDLE_ENFORCE_NOT_NULL(stepnet_); PADDLE_ENFORCE(!(*stepnet_)->Outputs().empty(), "stepnet_ op has no outputs"); - if (seq_len_ > step_scopes->size()) { - for (size_t i = step_scopes->size(); i < seq_len_; ++i) { + if (seq_len > step_scopes->size()) { + for (size_t i = step_scopes->size(); i < seq_len; ++i) { auto& step_scope = scope.NewScope(); // create step net's temp inputs @@ -144,17 +145,18 @@ class RecurrentAlgorithmProtoAndCheckerMaker void RecurrentGradientAlgorithm::Run( const Scope& scope, const platform::DeviceContext& dev_ctx) const { - seq_len_ = - scope.FindVar(arg_->inlinks[0])->GetMutable()->dims()[0]; + auto* input0 = scope.FindVar(arg_->inlinks[0]); + PADDLE_ENFORCE_NOT_NULL(input0); + size_t seq_len = input0->GetMutable()->dims()[0]; auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); - for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { - if (static_cast(step_id) != seq_len_ - 1) { + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len); + for (int step_id = seq_len - 1; step_id >= 0; --step_id) { + if (step_id != seq_len - 1) { rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); } (*stepnet_)->Run(*step_scopes[step_id], dev_ctx); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len); LinkBootMemoryGradients(step_scopes[0]); } diff --git a/paddle/operators/recurrent_op.h b/paddle/operators/recurrent_op.h index 752025e42c..253d7e3284 100644 --- a/paddle/operators/recurrent_op.h +++ b/paddle/operators/recurrent_op.h @@ -48,7 +48,7 @@ class RecurrentAlgorithm { * NOTE the scopes are reused in both the forward and backward, so just * create once and expand its size if more steps need. */ - void CreateScopes(const framework::Scope& scope) const; + void CreateScopes(const framework::Scope& scope, size_t seq_len) const; const std::vector& GetStepScopes( const framework::Scope& scope) const { @@ -61,7 +61,6 @@ class RecurrentAlgorithm { private: std::unique_ptr* stepnet_; rnn::Argument* arg_; - mutable size_t seq_len_; }; class RecurrentGradientAlgorithm { @@ -97,7 +96,6 @@ class RecurrentGradientAlgorithm { private: rnn::Argument* arg_; - mutable size_t seq_len_; std::unique_ptr* stepnet_; }; diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index a02994f99d..a37d21d480 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -53,12 +53,12 @@ void ConcatOutputs(const std::vector& step_scopes, const std::vector& outlinks, const size_t seq_len) { for (size_t i = 0; i < outlinks.size(); i++) { - auto output_var = step_scopes[0]->parent().FindVar(outlinks[i]); + auto* output_var = step_scopes[0]->parent().FindVar(outlinks[i]); PADDLE_ENFORCE_NOT_NULL(output_var, "output link [%s] is not in scope.", outlinks[i]); LoDTensor* output = output_var->GetMutable(); - auto step_scope_var = step_scopes[0]->FindVar(outlinks[i]); + auto* step_scope_var = step_scopes[0]->FindVar(outlinks[i]); PADDLE_ENFORCE_NOT_NULL(step_scope_var, "%s not in scope", outlinks[i]); f::DDim step_dims = step_scope_var->template GetMutable()->dims(); @@ -89,8 +89,8 @@ void LinkMemories(const std::vector& scopes, step_id + offset, scopes.size(), "offset [%d] is out of range, it must be less than (%d - %d)", offset, scopes.size(), step_id); - auto scope = scopes[step_id]; - auto linked_scope = scopes[step_id + offset]; + auto* scope = scopes[step_id]; + auto* linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { auto mem = scope->FindVar(attr.pre_var)->GetMutable(); auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); From cde542e6524d8fd084983e20e0051a3caf22f6b1 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Mon, 2 Oct 2017 14:51:15 -0700 Subject: [PATCH 09/15] optimize auto --- paddle/operators/recurrent_op.cc | 6 +++--- paddle/operators/rnn/recurrent_op_utils.cc | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index bcd6a3410a..04c4c24951 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -53,9 +53,9 @@ void RecurrentAlgorithm::CreateScopes(const Scope& scope, size_t seq_len) const { // TODO(superjom) Only two scopes are needed for inference, this case will be // supported later. - auto step_scopes_var = scope.FindVar(arg_->step_scopes); + auto* step_scopes_var = scope.FindVar(arg_->step_scopes); PADDLE_ENFORCE(step_scopes_var != nullptr, ""); - auto step_scopes = step_scopes_var->GetMutable>(); + auto* step_scopes = step_scopes_var->GetMutable>(); // Now all variables in scope must be created outside of op. PADDLE_ENFORCE_NOT_NULL(stepnet_); @@ -148,7 +148,7 @@ void RecurrentGradientAlgorithm::Run( auto* input0 = scope.FindVar(arg_->inlinks[0]); PADDLE_ENFORCE_NOT_NULL(input0); size_t seq_len = input0->GetMutable()->dims()[0]; - auto step_scopes = GetStepScopes(scope); + auto& step_scopes = GetStepScopes(scope); rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len); for (int step_id = seq_len - 1; step_id >= 0; --step_id) { if (step_id != seq_len - 1) { diff --git a/paddle/operators/rnn/recurrent_op_utils.cc b/paddle/operators/rnn/recurrent_op_utils.cc index a37d21d480..ef317a71f1 100644 --- a/paddle/operators/rnn/recurrent_op_utils.cc +++ b/paddle/operators/rnn/recurrent_op_utils.cc @@ -92,8 +92,8 @@ void LinkMemories(const std::vector& scopes, auto* scope = scopes[step_id]; auto* linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { - auto mem = scope->FindVar(attr.pre_var)->GetMutable(); - auto linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); + auto* mem = scope->FindVar(attr.pre_var)->GetMutable(); + auto* linked_mem = linked_scope->FindVar(attr.var)->GetMutable(); mem->Resize(linked_mem->dims()); mem->ShareDataWith(*linked_mem); } @@ -106,11 +106,11 @@ void InitArgument(const ArgumentName& name, Argument* arg, arg->inlinks = op.Inputs(name.inlinks); arg->outlinks = op.Outputs(name.outlinks); - auto boot_memories = + auto& boot_memories = is_grad ? op.Outputs(name.boot_memories) : op.Inputs(name.boot_memories); // attributes - auto memories = op.Attr>(name.memories); - auto pre_memories = op.Attr>(name.pre_memories); + auto& memories = op.Attr>(name.memories); + auto& pre_memories = op.Attr>(name.pre_memories); PADDLE_ENFORCE(memories.size() == boot_memories.size(), "the size of memories, boot_memories don't match:%d,%d", From 9ff1fd41b2e8769d233e160975e036f539cda99f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 2 Oct 2017 15:57:18 -0700 Subject: [PATCH 10/15] Fix MacOS compile error The private data `tensor_shared_` is not used. --- paddle/framework/tensor_array.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/paddle/framework/tensor_array.h b/paddle/framework/tensor_array.h index e76f33d2c0..22ae6a966f 100644 --- a/paddle/framework/tensor_array.h +++ b/paddle/framework/tensor_array.h @@ -47,13 +47,6 @@ class TensorArray { // max number of values allowed to store. const size_t MAX_SIZE{100000}; - /* - * Inputs: - * - value_shared: share memory between tensors. - */ - explicit TensorArray(bool values_shared = true) - : values_shared_(values_shared) {} - /* * Read the value at location `index` in the `TensorArray`. */ @@ -111,7 +104,6 @@ class TensorArray { private: mutable std::vector values_; - bool values_shared_; }; // class TensorArray } // namespace framework From adec0d30fe8454f84b6bc61cc8b0385f6483d0c3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 2 Oct 2017 16:18:26 -0700 Subject: [PATCH 11/15] Simplify SumOp Kernel --- paddle/operators/CMakeLists.txt | 6 +++++- paddle/operators/sum_op.cc | 29 +++++++++++++++-------------- paddle/operators/sum_op.cu | 4 +--- paddle/operators/sum_op.h | 19 ------------------- 4 files changed, 21 insertions(+), 37 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 43eb4de2c1..0fa1fca2bc 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -103,12 +103,16 @@ set(DEPS_OPS recurrent_op cond_op cross_entropy_op - softmax_with_cross_entropy_op) + softmax_with_cross_entropy_op + sum_op) + + op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor net_op) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax) +op_library(sum_op DEPS net_op) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index c54843faa6..7c422b4770 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include "paddle/operators/sum_op.h" #include +#include "paddle/operators/net_op.h" namespace paddle { namespace operators { @@ -57,21 +58,23 @@ or not. But the output only shares the LoD with the first input. } }; -class SumGradOp : public framework::OperatorWithKernel { +class SumGradOp : public NetOp { public: - using framework::OperatorWithKernel::OperatorWithKernel; + SumGradOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : NetOp(type, inputs, outputs, attrs) { + auto& x_grad_names = Outputs(framework::GradVarName("X")); + auto out_grad_name = this->Input(framework::GradVarName("Out")); - protected: - void InferShape(framework::InferShapeContextBase* ctx) const override { - auto out_grad_dims = ctx->GetInputDim(framework::GradVarName("Out")); - auto x_grad_names = ctx->Outputs(framework::GradVarName("X")); - size_t x_length = x_grad_names.size(); - std::vector x_grad_dims; - x_grad_dims.reserve(x_length); - for (size_t i = 0; i < x_length; ++i) { - x_grad_dims.push_back(out_grad_dims); + framework::AttributeMap grad_attrs; + grad_attrs["scale"] = 1.0f; + for (auto& x_grad_name : x_grad_names) { + AppendOp(framework::OpRegistry::CreateOp( + "scale", {{"X", {out_grad_name}}}, {{"Out", {x_grad_name}}}, + grad_attrs)); } - ctx->SetOutputsDim(framework::GradVarName("X"), x_grad_dims); + CompleteAddOp(false); } }; @@ -81,5 +84,3 @@ class SumGradOp : public framework::OperatorWithKernel { namespace ops = paddle::operators; REGISTER_OP(sum, ops::SumOp, ops::SumOpMaker, sum_grad, ops::SumGradOp); REGISTER_OP_CPU_KERNEL(sum, ops::SumKernel); -REGISTER_OP_CPU_KERNEL(sum_grad, - ops::SumGradKernel); diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index a465cf3659..7129e6bf62 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -13,6 +13,4 @@ limitations under the License. */ #include "paddle/operators/sum_op.h" namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel); -REGISTER_OP_GPU_KERNEL(sum_grad, - ops::SumGradKernel); +REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel); \ No newline at end of file diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 7e8fbb9e41..91e5da8b40 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -42,24 +42,5 @@ class SumKernel : public framework::OpKernel { } }; -template -class SumGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input(framework::GradVarName("Out")); - auto outs = context.MultiOutput(framework::GradVarName("X")); - for (auto out : outs) { - out->mutable_data(context.GetPlace()); - } - - auto place = context.GetEigenDevice(); - auto in = EigenVector::Flatten(*input); - for (auto out : outs) { - auto result = EigenVector::Flatten(*out); - result.device(place) = in; - } - } -}; - } // namespace operators } // namespace paddle From ff1bfdedc97eb0834745e812abd619581fde7950 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 2 Oct 2017 17:09:25 -0700 Subject: [PATCH 12/15] Fix CRLF in sum_op.cu --- paddle/operators/sum_op.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/operators/sum_op.cu b/paddle/operators/sum_op.cu index 7129e6bf62..b1896d3cd8 100644 --- a/paddle/operators/sum_op.cu +++ b/paddle/operators/sum_op.cu @@ -13,4 +13,4 @@ limitations under the License. */ #include "paddle/operators/sum_op.h" namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(sum, ops::SumKernel); From b3e479da1c9cdb580e4577ebdafc5ec451ca4ed2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 2 Oct 2017 18:38:49 -0700 Subject: [PATCH 13/15] Fix CI --- paddle/framework/grad_op_builder_test.cc | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 55c5fa420e..2dbc2e6620 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -39,28 +39,6 @@ class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { namespace f = paddle::framework; -TEST(GradOpBuilder, AddTwo) { - std::shared_ptr add_op(f::OpRegistry::CreateOp( - "sum", {{"X", {"x", "y"}}}, {{"Out", {"out"}}}, {})); - std::shared_ptr grad_add_op = - f::OpRegistry::CreateGradOp(*add_op); - - EXPECT_EQ(grad_add_op->Inputs().size(), 1UL); - EXPECT_EQ(grad_add_op->Outputs().size(), 1UL); - EXPECT_EQ(grad_add_op->Input(f::GradVarName("Out")), f::GradVarName("out")); - auto &outputs = grad_add_op->Outputs(f::GradVarName("X")); - EXPECT_EQ(2UL, outputs.size()); - auto in_output = [&outputs](const std::string &name) { - for (auto &output_name : outputs) { - if (output_name == name) return true; - } - return false; - }; - - EXPECT_TRUE(in_output(f::GradVarName("x"))); - EXPECT_TRUE(in_output(f::GradVarName("y"))); -} - REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker, mult_io_grad, f::NOP); REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker, io_ignored_grad, f::NOP); From 42e7fe05a23067677fe7cf552e9534e329886fbb Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Mon, 2 Oct 2017 20:08:06 -0700 Subject: [PATCH 14/15] Changing learning rate from attribute to input(float) (#4568) * Changing learning rate from attribute to input(float) * Removing obsolete code --- paddle/operators/sgd_op.cc | 4 +++- paddle/operators/sgd_op.h | 2 +- paddle/pybind/pybind.cc | 7 +++++++ python/paddle/v2/framework/tests/op_test.py | 17 +++++++++++------ python/paddle/v2/framework/tests/test_sgd_op.py | 3 +-- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 3bce95535c..8f9eae4186 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -27,6 +27,8 @@ class SGDOp : public framework::OperatorWithKernel { "Input(param) of SGDOp should not be null."); PADDLE_ENFORCE(ctx->HasInput("grad"), "Input(grad) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("learning_rate"), + "Input(learning_rate) of SGDOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("param_out"), "Output(param_out) of SGDOp should not be null."); @@ -42,9 +44,9 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker { SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("param", "input parameter"); + AddInput("learning_rate", "learning rate of sgd"); AddInput("grad", "input gradient"); AddOutput("param_out", "output parameter"); - AddAttr("learning_rate", "learning rate of sgd"); AddComment(R"DOC( Simplest sgd algorithm. diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index a3fe330894..977d201ced 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output("param_out"); - float lr = ctx.Attr("learning_rate"); + float lr = *ctx.Input("learning_rate"); param_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index f4121e9d71..d480427f59 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -143,6 +143,13 @@ All parameter, weight, gradient are variables in Paddle. .def("set_int", [](Variable &var, int val) -> void { *var.GetMutable() = val; }) .def("get_int", [](const Variable &var) -> int { return var.Get(); }) + .def("is_float", [](const Variable &var) { return var.IsType(); }) + .def("set_float", + [](Variable &var, float val) -> void { + *var.GetMutable() = val; + }) + .def("get_float", + [](const Variable &var) -> float { return var.Get(); }) .def("get_tensor", [](Variable &self) -> LoDTensor * { return self.GetMutable(); diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index 75df2eeddf..81067f38bb 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -46,12 +46,17 @@ def create_op(scope, op_type, inputs, outputs, attrs): def set_input(scope, op, inputs, place): def __set_input__(var_name, var): - tensor = scope.find_var(var_name).get_tensor() - if isinstance(var, tuple): - tensor.set_lod(var[1]) - var = var[0] - tensor.set_dims(var.shape) - tensor.set(var, place) + if isinstance(var, tuple) or isinstance(var, np.ndarray): + tensor = scope.find_var(var_name).get_tensor() + if isinstance(var, tuple): + tensor.set_lod(var[1]) + var = var[0] + tensor.set_dims(var.shape) + tensor.set(var, place) + elif isinstance(var, float): + scope.find_var(var_name).set_float(var) + elif isinstance(var, int): + scope.find_var(var_name).set_int(var) for in_name, in_dup in Operator.get_op_inputs(op.type()): if in_name in inputs: diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index 64e54d1500..f1125f4edb 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -10,8 +10,7 @@ class TestSGDOp(OpTest): g = np.random.random((102, 105)).astype("float32") lr = 0.1 - self.inputs = {'param': w, 'grad': g} - self.attrs = {'learning_rate': lr} + self.inputs = {'param': w, 'grad': g, 'learning_rate': lr} self.outputs = {'param_out': w - lr * g} def test_check_output(self): From b5dbe88b5ab504f88c6e7eaaa8b27d3965701478 Mon Sep 17 00:00:00 2001 From: qijun Date: Mon, 2 Oct 2017 20:26:17 -0700 Subject: [PATCH 15/15] follow comments --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/executor.cc | 159 +++--------------------------- paddle/framework/executor.h | 14 ++- paddle/framework/executor_test.cc | 12 ++- paddle/platform/CMakeLists.txt | 2 + paddle/platform/device.cc | 59 +++++++++++ paddle/platform/device.h | 45 +++++++++ 7 files changed, 139 insertions(+), 154 deletions(-) create mode 100644 paddle/platform/device.cc create mode 100644 paddle/platform/device.h diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 984fc62aa3..506d0f9833 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -44,5 +44,5 @@ add_custom_command(TARGET framework_py_proto POST_BUILD cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context) -cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto) +cc_library(executor SRCS executor.cc DEPS op_registry device scope framework_proto) cc_test(executor_test SRCS executor_test.cc DEPS executor) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index ebe3259bc0..57e177bb0a 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -15,162 +15,31 @@ limitations under the License. */ #include "paddle/framework/executor.h" #include #include "paddle/framework/op_registry.h" -#include "paddle/framework/operator.h" #include "paddle/framework/scope.h" -#include "paddle/platform/device_context.h" namespace paddle { namespace framework { -class LinearListView; -class GraphView; - -// Immutable view of a ProgramDesc organized for efficient execution. -class ProgramDescView { - public: - virtual ~ProgramDescView() {} - virtual void Initialize(const ProgramDesc*) = 0; - static ProgramDescView* Create(bool is_linear); -}; - -class LinearListView : public ProgramDescView { - public: - void Initialize(const ProgramDesc*) override; - - private: - std::vector> ops_; -}; - -class GraphView : public ProgramDescView { - public: - void Initialize(const ProgramDesc*) override; -}; - -ProgramDescView* ProgramDescView::Create(bool is_linear) { - if (is_linear) { - return new LinearListView(); - } else { - return new GraphView(); - } -} - -void LinearListView::Initialize(const ProgramDesc* pdesc) { - // get a LinearView of ProgramDesc - for (auto& block_desc : pdesc->blocks()) { - for (auto& op_desc : block_desc.ops()) { - ops_.emplace_back(OpRegistry::CreateOp(op_desc)); - } +Executor::Executor(const std::vector& places) { + devices_.resize(places.size()); + for (size_t i = 0; i < places.size(); i++) { + devices_[i] = platform::GetDevice(places[i]); } } -void GraphView::Initialize(const ProgramDesc* pdesc) { - // get a GraphView of ProgramDesc -} - -struct Device { - platform::CPUDeviceContext* cpu_device_context; -#ifndef PADDLE_ONLY_CPU - platform::CUDADeviceContext* cuda_device_context; -#endif - -#ifndef PADDLE_ONLY_CPU - Device(platform::CPUDeviceContext* cpu, platform::CUDADeviceContext* gpu) - : cpu_device_context(cpu), cuda_device_context(gpu) {} -#else - explicit Device(platform::CPUDeviceContext* cpu) : cpu_device_context(cpu) {} -#endif -}; - -class ExecutorImpl : public Executor { - public: - ExecutorImpl(Scope* scope, const Device* device, const ProgramDesc* pdesc, - bool is_linear) - : scope_(scope), - device_(device), - program_desc_(pdesc), - view_(ProgramDescView::Create(is_linear)) {} - - virtual ~ExecutorImpl() { - if (view_) delete view_; - } - - void Run() override; - - void Initialize(); - - private: - Scope* scope_; - const Device* device_; - const ProgramDesc* program_desc_; - ProgramDescView* view_; -}; - -template -std::unique_ptr make_unique(Args&&... args) { - return std::unique_ptr(new T(std::forward(args)...)); -} - -platform::CPUDeviceContext* GetCPUDeviceContext( - const platform::CPUPlace& place) { - static std::unique_ptr g_cpu_device_context = - make_unique(place); - return g_cpu_device_context.get(); -} - -#ifndef PADDLE_ONLY_CPU -platform::CUDADeviceContext* GetCUDADeviceContext( - const platform::GPUPlace& place) { - static std::unique_ptr g_cuda_device_context = - make_unique(place); - return g_cuda_device_context.get(); -} -#endif - -Device* GetDevice(const platform::Place& place) { - platform::CPUPlace cpu_place; -#ifndef PADDLE_ONLY_CPU - if (platform::is_gpu_place(place)) { - platform::GPUPlace gpu_place = boost::get(place); - static std::unique_ptr g_device = make_unique( - GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place)); - return g_device.get(); - } else { - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place), nullptr); - return g_device.get(); - } -#else - static std::unique_ptr g_device = - make_unique(GetCPUDeviceContext(cpu_place)); - return g_device.get(); -#endif -} - -framework::Scope* GetScope() { - static std::unique_ptr g_scope = - make_unique(); - return g_scope.get(); -} - -Executor* NewLocalExecutor(const platform::Place& place, - const ProgramDesc& pdesc, bool is_linear) { - return new ExecutorImpl(GetScope(), GetDevice(place), &pdesc, is_linear); -} - -void ExecutorImpl::Run() { +void Executor::Run(const ProgramDesc& pdesc, Scope* scope, + std::vector* outputs) { // operators running - scope_->NewVar(); - device_->cpu_device_context->Wait(); + Scope& local_scope = scope->NewScope(); + local_scope.NewVar(); + for (auto device : devices_) { + device->cpu_device_context->Wait(); #ifndef PADDLE_ONLY_CPU - if (device_->cuda_device_context) { - device_->cuda_device_context->Wait(); - } + if (device->cuda_device_context) { + device->cuda_device_context->Wait(); + } #endif -} - -void ExecutorImpl::Initialize() { - // Initialize the ProgramDescView - view_->Initialize(program_desc_); + } } } // namespace framework diff --git a/paddle/framework/executor.h b/paddle/framework/executor.h index 25ef2d4d48..5d6d7f37a6 100644 --- a/paddle/framework/executor.h +++ b/paddle/framework/executor.h @@ -15,18 +15,22 @@ limitations under the License. */ #pragma once #include "paddle/framework/framework.pb.h" -#include "paddle/platform/place.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/device.h" namespace paddle { namespace framework { class Executor { public: - virtual ~Executor() {} - virtual void Run() = 0; -}; + explicit Executor(const std::vector& places); + ~Executor() {} + void Run(const ProgramDesc&, Scope*, std::vector*); -Executor* NewLocalExecutor(const platform::Place&, const ProgramDesc&, bool); + private: + std::vector devices_; +}; } // namespace framework } // namespace paddle diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index 6f8ca38768..51d2dfc1c3 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -19,9 +19,15 @@ using namespace paddle::platform; using namespace paddle::framework; TEST(Executor, Init) { + CPUPlace cpu_place1, cpu_place2; + std::vector places; + places.push_back(cpu_place1); + places.push_back(cpu_place2); + Executor* executor = new Executor(places); + ProgramDesc pdesc; - CPUPlace cpu_place; - Executor* executor = NewLocalExecutor(cpu_place, pdesc, true); - executor->Run(); + Scope s; + std::vector* outputs{nullptr}; + executor->Run(pdesc, &s, outputs); delete executor; } \ No newline at end of file diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index daf519b91d..b581937393 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -23,5 +23,7 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) +cc_library(device SRCS device.cc DEPS device_context) + nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context) diff --git a/paddle/platform/device.cc b/paddle/platform/device.cc new file mode 100644 index 0000000000..7acd87c8c3 --- /dev/null +++ b/paddle/platform/device.cc @@ -0,0 +1,59 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/platform/device.h" + +namespace paddle { +namespace platform { + +template +std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} + +CPUDeviceContext* GetCPUDeviceContext(const CPUPlace& place) { + static std::unique_ptr g_cpu_device_context = + make_unique(place); + return g_cpu_device_context.get(); +} + +#ifndef PADDLE_ONLY_CPU +CUDADeviceContext* GetCUDADeviceContext(const GPUPlace& place) { + static std::unique_ptr g_cuda_device_context = + make_unique(place); + return g_cuda_device_context.get(); +} +#endif + +Device* GetDevice(const Place& place) { + CPUPlace cpu_place; +#ifndef PADDLE_ONLY_CPU + if (is_gpu_place(place)) { + GPUPlace gpu_place = boost::get(place); + static std::unique_ptr g_device = make_unique( + GetCPUDeviceContext(cpu_place), GetCUDADeviceContext(gpu_place)); + return g_device.get(); + } else { + static std::unique_ptr g_device = + make_unique(GetCPUDeviceContext(cpu_place), nullptr); + return g_device.get(); + } +#else + static std::unique_ptr g_device = + make_unique(GetCPUDeviceContext(cpu_place)); + return g_device.get(); +#endif +} +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/device.h b/paddle/platform/device.h new file mode 100644 index 0000000000..b1bb8073cf --- /dev/null +++ b/paddle/platform/device.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/platform/device_context.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace platform { + +struct Device { + CPUDeviceContext* cpu_device_context; +#ifndef PADDLE_ONLY_CPU + CUDADeviceContext* cuda_device_context; +#endif + +#ifndef PADDLE_ONLY_CPU + Device(CPUDeviceContext* cpu, CUDADeviceContext* gpu) + : cpu_device_context(cpu), cuda_device_context(gpu) {} +#else + explicit Device(CPUDeviceContext* cpu) : cpu_device_context(cpu) {} +#endif +}; + +CPUDeviceContext* GetCPUDeviceContext(const platform::CPUPlace& place); + +#ifndef PADDLE_ONLY_CPU +CUDADeviceContext* GetCUDADeviceContext(const platform::GPUPlace& place); +#endif + +Device* GetDevice(const platform::Place& place); +} // namespace platform +} // namespace paddle