From 6cfb9a326251dcf9fe7aeef14ac4f3ff56d23111 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 31 Jul 2017 20:21:04 +0800 Subject: [PATCH 01/25] Refine InferShape for recurrent_network_op. * the tensor only contains shape and does not hold memory when inferring shape. --- paddle/operators/recurrent_network_op.cc | 147 +++++++----------- paddle/operators/recurrent_network_op.h | 16 +- paddle/operators/recurrent_network_op_test.cc | 33 ++-- 3 files changed, 84 insertions(+), 112 deletions(-) diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_network_op.cc index 1a101d6ddf..b21a21c6e9 100644 --- a/paddle/operators/recurrent_network_op.cc +++ b/paddle/operators/recurrent_network_op.cc @@ -29,7 +29,8 @@ namespace rnn { void SegmentInputs(std::vector>& step_scopes, const std::vector& inlinks, - const size_t seq_len) { + const size_t seq_len, + bool infer_shape) { PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); for (size_t i = 0; i < inlinks.size(); ++i) { Tensor* input = @@ -42,7 +43,9 @@ void SegmentInputs(std::vector>& step_scopes, Tensor* step_input = step_scopes[j] ->CreateVariable(inlinks[i].internal) ->GetMutable(); - *step_input = input->Slice(j, j + 1); + if (!infer_shape) { + *step_input = input->Slice(j, j + 1); + } step_input->Resize(step_dims); } } @@ -50,20 +53,23 @@ void SegmentInputs(std::vector>& step_scopes, void ConcatOutputs(std::vector>& step_scopes, const std::vector& outlinks, - const size_t seq_len) { + const size_t seq_len, + bool infer_shape) { for (size_t i = 0; i < outlinks.size(); i++) { Tensor* output = step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable(); - // TODO(qingiqng) remove following code after adding - // InferShape in RecurrentGradientOp - DDim step_dims = step_scopes[0] - ->GetVariable(outlinks[i].internal) - ->GetMutable() - ->dims(); - std::vector dims_vec = vectorize(step_dims); - dims_vec.insert(dims_vec.begin(), seq_len); - output->mutable_data(make_ddim(dims_vec), platform::CPUPlace()); + if (infer_shape) { + DDim step_dims = step_scopes[0] + ->GetVariable(outlinks[i].internal) + ->GetMutable() + ->dims(); + std::vector dims_vec = vectorize(step_dims); + dims_vec.insert(dims_vec.begin(), seq_len); + output->Resize(make_ddim(dims_vec)); + } else { + output->mutable_data(platform::CPUPlace()); + } for (size_t j = 0; j < seq_len; j++) { Tensor* step_output = step_scopes[j] @@ -79,8 +85,9 @@ void ConcatOutputs(std::vector>& step_scopes, void LinkMemories(std::vector>& scopes, const std::vector& memories, - size_t step_id, - int offset) { + const size_t step_id, + const int offset, + bool infer_shape) { PADDLE_ENFORCE(step_id < scopes.size(), "step [%d] is out of range of step scopes' size [%d]", step_id, @@ -97,18 +104,14 @@ void LinkMemories(std::vector>& scopes, std::shared_ptr scope = scopes[step_id]; std::shared_ptr linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { - auto mem = scope->CreateVariable(attr.pre_var)->GetMutable(); + auto mem = scope->GetVariable(attr.pre_var)->GetMutable(); // maybe share variable is better? auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable(); - mem->ShareDataWith(*linked_mem); - - // TODO(qingqing) remove following code - // the memory of current step should be allocated in step net - auto m = scope->CreateVariable(attr.var)->GetMutable(); - // for unit test, as addOp and mulOp are null currently, if not - // mutable_data, mem.data() in output will be error. We will - // remove this line after merge the correct addOp and mulOp. - m->mutable_data(mem->dims(), platform::CPUPlace()); + if (infer_shape) { + mem->Resize(linked_mem->dims()); + } else { + mem->ShareDataWith(*linked_mem); + } } } @@ -176,61 +179,43 @@ void RecurrentAlgorithm::InferShape(const std::shared_ptr& scope) const { ->GetMutable() ->dims()[0]; CreateScopes(scope); - auto step_scopes = GetStepScopes(scope); - // SegmentInputs is called in InferShape. The input must hold memory in - // SegmentInputs. But the other op only set dimension for the output in - // InferShape. That's a problem. Wether the RNN op needs InferShape or not? - // Wether the following functions (SegmentInputs, InitMemories, ...) need - // to rewrite for RNN op? - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); + auto step_scopes = GetStepScopes(scope); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true); - InitMemories(step_scopes[0]); + InitMemories(step_scopes[0], true); PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), "stepnet [%s] is not in scope.", arg_->step_net); Variable* net = scope->GetVariable(arg_->step_net); PADDLE_ENFORCE(net != nullptr, "failed to get step net"); - // If the InferShape is called in OperatorBase's run function, - // the rnn op only needs to do InferShape for the first time step for (size_t i = 0; i < seq_len_; i++) { if (i > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, i, -1); + rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true); } net->GetMutable()->InferShape(step_scopes[i]); } - - auto outlinks = arg_->outlinks; - for (size_t i = 0; i < outlinks.size(); i++) { - DDim step_dims = step_scopes[0] - ->GetVariable(outlinks[i].internal) - ->GetMutable() - ->dims(); - std::vector dims_vec = vectorize(step_dims); - // now only support fixed length - dims_vec.insert(dims_vec.begin(), seq_len_); - Tensor* output = - step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable(); - output->Resize(make_ddim(dims_vec)); - } + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true); } void RecurrentAlgorithm::Run(const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const { auto step_scopes = GetStepScopes(scope); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false); + + InitMemories(step_scopes[0], false); + Variable* net = scope->GetVariable(arg_->step_net); for (size_t step_id = 0; step_id < seq_len_; step_id++) { - // the link memory is done in InferShape - // maybe remove following code after testing if (step_id > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1); + rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false); } net->GetMutable()->Run(step_scopes[step_id], dev_ctx); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false); } void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { @@ -246,6 +231,7 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { // Now all variables in scope must be created outside of op. auto net_op = scope->GetVariable(arg_->step_net)->GetMutable(); for (auto& input : net_op->inputs_) { + // the weight are located in parent scope step_scope->CreateVariable(input); } for (auto& output : net_op->outputs_) { @@ -257,7 +243,8 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { } } -void RecurrentAlgorithm::InitMemories(std::shared_ptr step_scope) const { +void RecurrentAlgorithm::InitMemories(std::shared_ptr step_scope, + bool infer_shape) const { for (auto& attr : arg_->memories) { Tensor* pre_mem = step_scope->CreateVariable(attr.pre_var)->GetMutable(); @@ -267,14 +254,11 @@ void RecurrentAlgorithm::InitMemories(std::shared_ptr step_scope) const { attr.boot_var); Tensor* boot_mem = step_scope->GetVariable(attr.boot_var)->GetMutable(); - pre_mem->ShareDataWith(*boot_mem); - - // TODO(qingqing) remove following code - // the memory of current step should be allocated in step net - // here for unit test - auto cur_step_mem = - step_scope->CreateVariable(attr.var)->GetMutable(); - cur_step_mem->mutable_data(boot_mem->dims(), platform::CPUPlace()); + if (infer_shape) { + pre_mem->Resize(boot_mem->dims()); + } else { + pre_mem->ShareDataWith(*boot_mem); + } } } @@ -336,35 +320,37 @@ void RecurrentGradientAlgorithm::Run( const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const { auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false); PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), "step net is not in scope."); Variable* net = scope->GetVariable(arg_->step_net); PADDLE_ENFORCE(net != nullptr, "failed to get step net"); for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); + rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false); } net->GetMutable()->Run(step_scopes[step_id], dev_ctx); } - LinkBootMemoryGradients(step_scopes[0]); - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_); + LinkBootMemoryGradients(step_scopes[0], false); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false); } void RecurrentGradientAlgorithm::LinkBootMemoryGradients( - std::shared_ptr step_scope) const { + std::shared_ptr step_scope, bool infer_shape) const { for (auto& attr : arg_->memories) { Tensor* mem_grad = step_scope->CreateVariable(attr.var)->GetMutable(); - PADDLE_ENFORCE(mem_grad != nullptr, - "boot_tensor should be retrieved before"); PADDLE_ENFORCE(step_scope->HasVariable(attr.boot_var), "memory [%s]'s boot variable [%s] not exists", attr.var, attr.boot_var); Tensor* boot_mem_grad = step_scope->CreateVariable(attr.boot_var)->GetMutable(); - boot_mem_grad->ShareDataWith(*mem_grad); + if (infer_shape) { + boot_mem_grad->Resize(mem_grad->dims()); + } else { + boot_mem_grad->ShareDataWith(*mem_grad); + } } } @@ -374,7 +360,7 @@ void RecurrentGradientAlgorithm::InferShape( ->GetMutable() ->dims()[0]; auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_); + rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true); PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), "step net is not in scope."); @@ -383,25 +369,12 @@ void RecurrentGradientAlgorithm::InferShape( for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1); + rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true); } net->GetMutable()->InferShape(step_scopes[step_id]); } - - auto outlinks = arg_->outlinks; - for (size_t i = 0; i < outlinks.size(); i++) { - DDim step_dims = step_scopes[0] - ->GetVariable(outlinks[i].internal) - ->GetMutable() - ->dims(); - std::vector dims_vec = vectorize(step_dims); - // now only support fixed length - dims_vec.insert(dims_vec.begin(), seq_len_); - Tensor* output = - step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable(); - output->Resize(make_ddim(dims_vec)); - } - LinkBootMemoryGradients(step_scopes[0]); + rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true); + LinkBootMemoryGradients(step_scopes[0], true); } void RecurrentGradientOp::Init() { diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_network_op.h index 8946c8ce38..87a997b82e 100644 --- a/paddle/operators/recurrent_network_op.h +++ b/paddle/operators/recurrent_network_op.h @@ -72,19 +72,22 @@ struct ArgumentName { */ void SegmentInputs(std::vector>& step_scopes, const std::vector& inlinks, - const size_t seq_len); + const size_t seq_len, + bool infer_shape); /** * Process outputs of step nets and merge to variables. */ void ConcatOutputs(std::vector>& step_scopes, const std::vector& outlinks, - const size_t seq_len); + const size_t seq_len, + bool infer_shape); void LinkMemories(std::vector>& step_scopes, const std::vector& memories, - size_t step_id, - int offset); + const size_t step_id, + const int offset, + bool infer_shape); void InitArgument(const ArgumentName& name, Argument* arg); @@ -125,7 +128,7 @@ protected: ->GetMutable>>(); } - void InitMemories(std::shared_ptr step_scopes) const; + void InitMemories(std::shared_ptr step_scopes, bool infer_shape) const; private: std::unique_ptr arg_; @@ -149,7 +152,8 @@ public: void Run(const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const; - void LinkBootMemoryGradients(std::shared_ptr step_scopes) const; + void LinkBootMemoryGradients(std::shared_ptr step_scopes, + bool infer_shape) const; /** * InferShape must be called before Run. diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_network_op_test.cc index 6784ac6001..86588a969c 100644 --- a/paddle/operators/recurrent_network_op_test.cc +++ b/paddle/operators/recurrent_network_op_test.cc @@ -56,7 +56,7 @@ protected: w->GetMutable()->mutable_data( make_ddim(std::vector{30, 30}), platform::CPUPlace()); - for (auto boot : std::vector{"x_boot", "h_boot"}) { + for (auto boot : std::vector{"h_boot"}) { LOG(INFO) << "create global variable " << boot; Variable* h_boot = scope_->CreateVariable(boot); h_boot->GetMutable()->mutable_data( @@ -80,7 +80,6 @@ protected: op_desc.add_inputs("x0"); op_desc.add_inputs("x1"); // boot_memories 3 - op_desc.add_inputs("x_boot"); op_desc.add_inputs("h_boot"); // step net 5 op_desc.add_inputs("step_net"); @@ -92,7 +91,7 @@ protected: auto _input_format = std::vector{ 0, // in_link 3, // memories - 5 // step_net + 4 // step_net }; auto input_format = op_desc.add_attrs(); input_format->set_name("input_format"); @@ -130,12 +129,11 @@ protected: inlink_alias->add_strings(item); } // pre memories - for (const auto& item : - std::vector{"rnn/x@pre", "rnn/h@pre"}) { + for (const auto& item : std::vector{"rnn/h@pre"}) { pre_memories->add_strings(item); } // memories - for (const auto& item : std::vector{"rnn/x", "rnn/h"}) { + for (const auto& item : std::vector{"rnn/h"}) { memories->add_strings(item); } // output alias @@ -152,14 +150,11 @@ protected: LOG(INFO) << "create variable step_net"; Variable* var = scope_->CreateVariable("step_net"); auto net = var->GetMutable(); - // rnn/s is net's input or output? - net->inputs_ = {"rnn/h@pre", "rnn/w", "rnn/x"}; - net->inputs_ = {"rnn/s", "rnn/h"}; net->AddOp( OpRegistry::CreateOp("mul", {"rnn/h@pre", "rnn/w"}, {"rnn/s"}, {})); net->AddOp( - OpRegistry::CreateOp("add_two", {"rnn/x", "rnn/s"}, {"rnn/h"}, {})); + OpRegistry::CreateOp("add_two", {"x@alias", "rnn/s"}, {"rnn/h"}, {})); net->CompleteAddOp(); } @@ -303,7 +298,7 @@ protected: std::vector>* step_scopes = scope_->GetVariable("step_scopes") ->GetMutable>>(); - rnn::SegmentInputs(*step_scopes, std::vector{inlink}, 10); + rnn::SegmentInputs(*step_scopes, std::vector{inlink}, 10, true); } void LinkeMemories() { @@ -318,7 +313,7 @@ protected: scope_->GetVariable("step_scopes") ->GetMutable>>(); for (int i = 1; i < 10; ++i) { - rnn::LinkMemories(*step_scopes, memories, i, -1); + rnn::LinkMemories(*step_scopes, memories, i, -1, true); } } @@ -347,7 +342,7 @@ TEST(RecurrentOp, LinkMemories) { scope->CreateVariable("pre_h"); auto tensor = scope->CreateVariable("h")->GetMutable(); float* data = tensor->mutable_data(make_ddim({15, 20}), CPUPlace()); - for (int i = 0; i < 15 * 20; ++i) { + for (int j = 0; j < 15 * 20; ++j) { data[i] = rand() * (1. / (double)RAND_MAX); } step_scopes.push_back(scope); @@ -362,7 +357,7 @@ TEST(RecurrentOp, LinkMemories) { memories.push_back(mem_attr); for (int i = 1; i < len; ++i) { - rnn::LinkMemories(step_scopes, memories, i, -1); + rnn::LinkMemories(step_scopes, memories, i, -1, false); } // check for (int i = 0; i < len - 1; ++i) { @@ -372,13 +367,13 @@ TEST(RecurrentOp, LinkMemories) { ->GetVariable("pre_h") ->GetMutable() ->data(); - for (size_t i = 0; i < 15 * 20; ++i) { - ASSERT_FLOAT_EQ(a[i], b[i]); + for (size_t j = 0; j < 15 * 20; ++j) { + ASSERT_FLOAT_EQ(a[j], b[j]); } } for (int i = len - 2; i >= 0; --i) { - rnn::LinkMemories(step_scopes, memories, i, 1); + rnn::LinkMemories(step_scopes, memories, i, 1, false); } // check for (int i = len - 2; i >= 0; --i) { @@ -390,8 +385,8 @@ TEST(RecurrentOp, LinkMemories) { ->GetVariable("h") ->GetMutable() ->data(); - for (size_t i = 0; i < 15 * 20; ++i) { - ASSERT_FLOAT_EQ(a[i], b[i]); + for (size_t j = 0; j < 15 * 20; ++j) { + ASSERT_FLOAT_EQ(a[j], b[j]); } } } From 8925295a4b63dd6dc95b95b909be0ef4e2c5f4b0 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 1 Aug 2017 16:34:59 +0800 Subject: [PATCH 02/25] follow comments. --- paddle/operators/recurrent_network_op.cc | 111 +++++++++--------- paddle/operators/recurrent_network_op.h | 11 +- paddle/operators/recurrent_network_op_test.cc | 14 ++- 3 files changed, 69 insertions(+), 67 deletions(-) diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_network_op.cc index b21a21c6e9..dcb1ac19d2 100644 --- a/paddle/operators/recurrent_network_op.cc +++ b/paddle/operators/recurrent_network_op.cc @@ -30,11 +30,14 @@ namespace rnn { void SegmentInputs(std::vector>& step_scopes, const std::vector& inlinks, const size_t seq_len, - bool infer_shape) { + bool infer_shape_mode) { PADDLE_ENFORCE(!inlinks.empty(), "no in links are provided."); for (size_t i = 0; i < inlinks.size(); ++i) { - Tensor* input = - step_scopes[0]->GetVariable(inlinks[i].external)->GetMutable(); + auto input_var = step_scopes[0]->GetVariable(inlinks[i].external); + PADDLE_ENFORCE(input_var != nullptr, + "input link [%s] is not in scope.", + inlinks[i].external); + Tensor* input = input_var->GetMutable(); DDim dims = input->dims(); PADDLE_ENFORCE(static_cast(dims[0]) == seq_len, "all the inlinks must have same length"); @@ -43,7 +46,7 @@ void SegmentInputs(std::vector>& step_scopes, Tensor* step_input = step_scopes[j] ->CreateVariable(inlinks[i].internal) ->GetMutable(); - if (!infer_shape) { + if (!infer_shape_mode) { *step_input = input->Slice(j, j + 1); } step_input->Resize(step_dims); @@ -54,12 +57,14 @@ void SegmentInputs(std::vector>& step_scopes, void ConcatOutputs(std::vector>& step_scopes, const std::vector& outlinks, const size_t seq_len, - bool infer_shape) { + bool infer_shape_mode) { for (size_t i = 0; i < outlinks.size(); i++) { + PADDLE_ENFORCE(step_scopes[0]->HasVariable(outlinks[i].external), + "output link [%s] is not in scope.", + outlinks[i].external); Tensor* output = step_scopes[0]->GetVariable(outlinks[i].external)->GetMutable(); - - if (infer_shape) { + if (infer_shape_mode) { DDim step_dims = step_scopes[0] ->GetVariable(outlinks[i].internal) ->GetMutable() @@ -69,16 +74,15 @@ void ConcatOutputs(std::vector>& step_scopes, output->Resize(make_ddim(dims_vec)); } else { output->mutable_data(platform::CPUPlace()); - } - - for (size_t j = 0; j < seq_len; j++) { - Tensor* step_output = step_scopes[j] - ->GetVariable(outlinks[i].internal) - ->GetMutable(); - // TODO(luotao02) data type and platform::DeviceContext() should set - // correctly - (output->Slice(j, j + 1)) - .CopyFrom(*step_output, platform::CPUPlace()); + for (size_t j = 0; j < seq_len; j++) { + Tensor* step_output = step_scopes[j] + ->GetVariable(outlinks[i].internal) + ->GetMutable(); + // TODO(luotao02) data type and platform::DeviceContext() should set + // correctly + (output->Slice(j, j + 1)) + .CopyFrom(*step_output, platform::CPUPlace()); + } } } } @@ -87,7 +91,7 @@ void LinkMemories(std::vector>& scopes, const std::vector& memories, const size_t step_id, const int offset, - bool infer_shape) { + bool infer_shape_mode) { PADDLE_ENFORCE(step_id < scopes.size(), "step [%d] is out of range of step scopes' size [%d]", step_id, @@ -107,7 +111,7 @@ void LinkMemories(std::vector>& scopes, auto mem = scope->GetVariable(attr.pre_var)->GetMutable(); // maybe share variable is better? auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable(); - if (infer_shape) { + if (infer_shape_mode) { mem->Resize(linked_mem->dims()); } else { mem->ShareDataWith(*linked_mem); @@ -179,43 +183,39 @@ void RecurrentAlgorithm::InferShape(const std::shared_ptr& scope) const { ->GetMutable() ->dims()[0]; CreateScopes(scope); - auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true); - - InitMemories(step_scopes[0], true); - - PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), - "stepnet [%s] is not in scope.", - arg_->step_net); + rnn::SegmentInputs( + step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/); + InitMemories(step_scopes[0], true /*infer_shape_mode*/); Variable* net = scope->GetVariable(arg_->step_net); PADDLE_ENFORCE(net != nullptr, "failed to get step net"); for (size_t i = 0; i < seq_len_; i++) { if (i > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, i, -1, true); + rnn::LinkMemories( + step_scopes, arg_->memories, i, -1, true /*infer_shape_mode*/); } net->GetMutable()->InferShape(step_scopes[i]); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true); + rnn::ConcatOutputs( + step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); } void RecurrentAlgorithm::Run(const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const { auto step_scopes = GetStepScopes(scope); - - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false); - - InitMemories(step_scopes[0], false); - + rnn::SegmentInputs( + step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/); + InitMemories(step_scopes[0], false /*infer_shape_mode*/); Variable* net = scope->GetVariable(arg_->step_net); for (size_t step_id = 0; step_id < seq_len_; step_id++) { if (step_id > 0) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, -1, false); + rnn::LinkMemories( + step_scopes, arg_->memories, step_id, -1, false /*infer_shape_mode*/); } net->GetMutable()->Run(step_scopes[step_id], dev_ctx); } - - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false); + rnn::ConcatOutputs( + step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/); } void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { @@ -227,7 +227,6 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { if (seq_len_ > step_scopes->size()) { for (size_t i = step_scopes->size(); i < seq_len_; ++i) { std::shared_ptr step_scope = std::make_shared(scope); - // Now all variables in scope must be created outside of op. auto net_op = scope->GetVariable(arg_->step_net)->GetMutable(); for (auto& input : net_op->inputs_) { @@ -237,14 +236,13 @@ void RecurrentAlgorithm::CreateScopes(std::shared_ptr scope) const { for (auto& output : net_op->outputs_) { step_scope->CreateVariable(output); } - step_scopes->push_back(std::make_shared(step_scope)); } } } void RecurrentAlgorithm::InitMemories(std::shared_ptr step_scope, - bool infer_shape) const { + bool infer_shape_mode) const { for (auto& attr : arg_->memories) { Tensor* pre_mem = step_scope->CreateVariable(attr.pre_var)->GetMutable(); @@ -254,7 +252,7 @@ void RecurrentAlgorithm::InitMemories(std::shared_ptr step_scope, attr.boot_var); Tensor* boot_mem = step_scope->GetVariable(attr.boot_var)->GetMutable(); - if (infer_shape) { + if (infer_shape_mode) { pre_mem->Resize(boot_mem->dims()); } else { pre_mem->ShareDataWith(*boot_mem); @@ -320,23 +318,23 @@ void RecurrentGradientAlgorithm::Run( const std::shared_ptr& scope, const platform::DeviceContext& dev_ctx) const { auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, false); - PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), - "step net is not in scope."); + rnn::SegmentInputs( + step_scopes, arg_->inlinks, seq_len_, false /*infer_shape_mode*/); Variable* net = scope->GetVariable(arg_->step_net); - PADDLE_ENFORCE(net != nullptr, "failed to get step net"); for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, false); + rnn::LinkMemories( + step_scopes, arg_->memories, step_id, 1, false /*infer_shape_mode*/); } net->GetMutable()->Run(step_scopes[step_id], dev_ctx); } LinkBootMemoryGradients(step_scopes[0], false); - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, false); + rnn::ConcatOutputs( + step_scopes, arg_->outlinks, seq_len_, false /*infer_shape_mode*/); } void RecurrentGradientAlgorithm::LinkBootMemoryGradients( - std::shared_ptr step_scope, bool infer_shape) const { + std::shared_ptr step_scope, bool infer_shape_mode) const { for (auto& attr : arg_->memories) { Tensor* mem_grad = step_scope->CreateVariable(attr.var)->GetMutable(); @@ -346,7 +344,7 @@ void RecurrentGradientAlgorithm::LinkBootMemoryGradients( attr.boot_var); Tensor* boot_mem_grad = step_scope->CreateVariable(attr.boot_var)->GetMutable(); - if (infer_shape) { + if (infer_shape_mode) { boot_mem_grad->Resize(mem_grad->dims()); } else { boot_mem_grad->ShareDataWith(*mem_grad); @@ -360,21 +358,20 @@ void RecurrentGradientAlgorithm::InferShape( ->GetMutable() ->dims()[0]; auto step_scopes = GetStepScopes(scope); - rnn::SegmentInputs(step_scopes, arg_->inlinks, seq_len_, true); - - PADDLE_ENFORCE(scope->HasVariable(arg_->step_net), - "step net is not in scope."); + rnn::SegmentInputs( + step_scopes, arg_->inlinks, seq_len_, true /*infer_shape_mode*/); Variable* net = scope->GetVariable(arg_->step_net); PADDLE_ENFORCE(net != nullptr, "failed to get step net"); - for (int step_id = seq_len_ - 1; step_id >= 0; --step_id) { if (static_cast(step_id) != seq_len_ - 1) { - rnn::LinkMemories(step_scopes, arg_->memories, step_id, 1, true); + rnn::LinkMemories( + step_scopes, arg_->memories, step_id, 1, true /*infer_shape_mode*/); } net->GetMutable()->InferShape(step_scopes[step_id]); } - rnn::ConcatOutputs(step_scopes, arg_->outlinks, seq_len_, true); - LinkBootMemoryGradients(step_scopes[0], true); + rnn::ConcatOutputs( + step_scopes, arg_->outlinks, seq_len_, true /*infer_shape_mode*/); + LinkBootMemoryGradients(step_scopes[0], true /*infer_shape_mode*/); } void RecurrentGradientOp::Init() { diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_network_op.h index 87a997b82e..3f722d5608 100644 --- a/paddle/operators/recurrent_network_op.h +++ b/paddle/operators/recurrent_network_op.h @@ -73,7 +73,7 @@ struct ArgumentName { void SegmentInputs(std::vector>& step_scopes, const std::vector& inlinks, const size_t seq_len, - bool infer_shape); + bool infer_shape_mode); /** * Process outputs of step nets and merge to variables. @@ -81,13 +81,13 @@ void SegmentInputs(std::vector>& step_scopes, void ConcatOutputs(std::vector>& step_scopes, const std::vector& outlinks, const size_t seq_len, - bool infer_shape); + bool infer_shape_mode); void LinkMemories(std::vector>& step_scopes, const std::vector& memories, const size_t step_id, const int offset, - bool infer_shape); + bool infer_shape_mode); void InitArgument(const ArgumentName& name, Argument* arg); @@ -128,7 +128,8 @@ protected: ->GetMutable>>(); } - void InitMemories(std::shared_ptr step_scopes, bool infer_shape) const; + void InitMemories(std::shared_ptr step_scopes, + bool infer_shape_mode) const; private: std::unique_ptr arg_; @@ -153,7 +154,7 @@ public: const platform::DeviceContext& dev_ctx) const; void LinkBootMemoryGradients(std::shared_ptr step_scopes, - bool infer_shape) const; + bool infer_shape_mode) const; /** * InferShape must be called before Run. diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_network_op_test.cc index 86588a969c..635c2fe038 100644 --- a/paddle/operators/recurrent_network_op_test.cc +++ b/paddle/operators/recurrent_network_op_test.cc @@ -298,7 +298,10 @@ protected: std::vector>* step_scopes = scope_->GetVariable("step_scopes") ->GetMutable>>(); - rnn::SegmentInputs(*step_scopes, std::vector{inlink}, 10, true); + rnn::SegmentInputs(*step_scopes, + std::vector{inlink}, + 10, + true /*infer_shape_mode*/); } void LinkeMemories() { @@ -313,7 +316,8 @@ protected: scope_->GetVariable("step_scopes") ->GetMutable>>(); for (int i = 1; i < 10; ++i) { - rnn::LinkMemories(*step_scopes, memories, i, -1, true); + rnn::LinkMemories( + *step_scopes, memories, i, -1, true /*infer_shape_mode*/); } } @@ -343,7 +347,7 @@ TEST(RecurrentOp, LinkMemories) { auto tensor = scope->CreateVariable("h")->GetMutable(); float* data = tensor->mutable_data(make_ddim({15, 20}), CPUPlace()); for (int j = 0; j < 15 * 20; ++j) { - data[i] = rand() * (1. / (double)RAND_MAX); + data[j] = rand() * (1. / (double)RAND_MAX); } step_scopes.push_back(scope); } @@ -357,7 +361,7 @@ TEST(RecurrentOp, LinkMemories) { memories.push_back(mem_attr); for (int i = 1; i < len; ++i) { - rnn::LinkMemories(step_scopes, memories, i, -1, false); + rnn::LinkMemories(step_scopes, memories, i, -1, false /*infer_shape_mode*/); } // check for (int i = 0; i < len - 1; ++i) { @@ -373,7 +377,7 @@ TEST(RecurrentOp, LinkMemories) { } for (int i = len - 2; i >= 0; --i) { - rnn::LinkMemories(step_scopes, memories, i, 1, false); + rnn::LinkMemories(step_scopes, memories, i, 1, false /*infer_shape_mode*/); } // check for (int i = len - 2; i >= 0; --i) { From b94584cf4b70dc9074779b512f8e4eb14ad032e0 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Tue, 1 Aug 2017 17:18:09 +0800 Subject: [PATCH 03/25] Rename recurrent_network_op recurrent_op. --- paddle/operators/CMakeLists.txt | 6 ++---- .../{recurrent_network_op.cc => recurrent_op.cc} | 13 +++++++++---- .../{recurrent_network_op.h => recurrent_op.h} | 0 ...rent_network_op_test.cc => recurrent_op_test.cc} | 2 +- paddle/pybind/CMakeLists.txt | 2 +- 5 files changed, 13 insertions(+), 10 deletions(-) rename paddle/operators/{recurrent_network_op.cc => recurrent_op.cc} (97%) rename paddle/operators/{recurrent_network_op.h => recurrent_op.h} (100%) rename paddle/operators/{recurrent_network_op_test.cc => recurrent_op_test.cc} (99%) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 5085e1b925..9d28404f68 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -55,7 +55,5 @@ op_library(fc_op SRCS fc_op.cc DEPS mul_op rowwise_add_op sigmoid_op op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) -op_library(recurrent_network_op SRCS recurrent_network_op.cc DEPS op_desc -tensor op_registry operator net) -cc_test(recurrent_network_op_test SRCS recurrent_network_op_test.cc DEPS -recurrent_network_op gtest mul_op add_op) +op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net) +cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) diff --git a/paddle/operators/recurrent_network_op.cc b/paddle/operators/recurrent_op.cc similarity index 97% rename from paddle/operators/recurrent_network_op.cc rename to paddle/operators/recurrent_op.cc index dcb1ac19d2..b3132c2020 100644 --- a/paddle/operators/recurrent_network_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/operators/recurrent_network_op.h" +#include "paddle/operators/recurrent_op.h" #include #include @@ -108,8 +108,13 @@ void LinkMemories(std::vector>& scopes, std::shared_ptr scope = scopes[step_id]; std::shared_ptr linked_scope = scopes[step_id + offset]; for (auto& attr : memories) { + PADDLE_ENFORCE(scope->HasVariable(attr.pre_var), + "the pre-memory [%s] is not in scope.", + attr.pre_var); + PADDLE_ENFORCE(linked_scope->HasVariable(attr.var), + "the memory [%s] is not in linked scope.", + attr.var); auto mem = scope->GetVariable(attr.pre_var)->GetMutable(); - // maybe share variable is better? auto linked_mem = linked_scope->GetVariable(attr.var)->GetMutable(); if (infer_shape_mode) { mem->Resize(linked_mem->dims()); @@ -295,12 +300,12 @@ public: const auto& name = RecurrentOp::kArgName; // inputs and outputs stored in proto AddInputs(name.inlinks, - "the input that need to be segmented for each step."); + "the inputs that need to be segmented for each step."); AddInputs(name.boot_memories, "variables to initialize memories."); AddInput(name.step_net, "network shared by all steps."); AddOutputs(name.outlinks, - "the output that need to concated for all steps."); + "the outputs that need to concated for all steps."); AddOutput(name.step_scopes, "step scopes"); // Attributes stored in AttributeMap diff --git a/paddle/operators/recurrent_network_op.h b/paddle/operators/recurrent_op.h similarity index 100% rename from paddle/operators/recurrent_network_op.h rename to paddle/operators/recurrent_op.h diff --git a/paddle/operators/recurrent_network_op_test.cc b/paddle/operators/recurrent_op_test.cc similarity index 99% rename from paddle/operators/recurrent_network_op_test.cc rename to paddle/operators/recurrent_op_test.cc index 635c2fe038..4bff8a0ed6 100644 --- a/paddle/operators/recurrent_network_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -18,7 +18,7 @@ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/tensor.h" -#include "paddle/operators/recurrent_network_op.h" +#include "paddle/operators/recurrent_op.h" namespace paddle { namespace operators { diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 7d0e68a8f3..43d8e17ec1 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python - add_op fc_op sgd_op cross_entropy_op recurrent_network_op) + add_op fc_op sgd_op cross_entropy_op recurrent_op) From adfbf9cd68f04d5cb232cfc432895a3ed5e16e8e Mon Sep 17 00:00:00 2001 From: zhanghaichao Date: Tue, 1 Aug 2017 16:22:45 -0700 Subject: [PATCH 04/25] error fix on the front page --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2a6beeb342..b9793c3eab 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ We provide [English](http://doc.paddlepaddle.org/develop/doc/) and - [Deep Learning 101](http://book.paddlepaddle.org/index.html) - You might want to start from the this online interactive book that can run in Jupyter Notebook. + You might want to start from this online interactive book that can run in Jupyter Notebook. - [Distributed Training](http://doc.paddlepaddle.org/develop/doc/howto/usage/cluster/cluster_train_en.html) From 1bd64f18edf5bae153c27818c8dabb3d1d931e5e Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 2 Aug 2017 15:59:52 +0800 Subject: [PATCH 05/25] move MKLDNN and MKLML install path to build third party path and disable both when build doc and MacOS --- cmake/external/mkldnn.cmake | 45 +++++++++++++----------------- cmake/external/mklml.cmake | 18 +++++++----- paddle/scripts/travis/build_doc.sh | 4 +-- 3 files changed, 32 insertions(+), 35 deletions(-) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index eff15de73f..33988c66b2 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -20,28 +20,23 @@ INCLUDE(ExternalProject) SET(MKLDNN_PROJECT "extern_mkldnn") SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) -SET(MKLDNN_INSTALL_ROOT ${CMAKE_INSTALL_PREFIX}) -IF(NOT "$ENV{HOME}" STREQUAL "/root") - SET(MKLDNN_INSTALL_ROOT "$ENV{HOME}") -ENDIF() - -SET(MKLDNN_INSTALL_DIR "${MKLDNN_INSTALL_ROOT}/opt/paddle/third_party/mkldnn") -SET(MKLDNN_INCLUDE_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) +SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) +SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) -IF(WIN32) - MESSAGE(WARNING "It is not supported compiling with mkldnn in windows Paddle yet." - "Force WITH_MKLDNN=OFF") - SET(WITH_MKLDNN OFF) +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLDNN in Paddle yet." + "Force WITH_MKLDNN=OFF") + SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE) return() -ELSE(WIN32) - SET(MKLDNN_LIBRARY "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) - MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") - SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - #SET(CMAKE_MACOSX_RPATH 1) # hold for MacOS - SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") -ENDIF(WIN32) +ENDIF() + +SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) +MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") +SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") -INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR}) +INCLUDE_DIRECTORIES(${MKLDNN_INC_DIR}) IF(${CBLAS_PROVIDER} STREQUAL "MKLML") SET(MKLDNN_DEPENDS ${MKLML_PROJECT}) @@ -57,16 +52,14 @@ ExternalProject_Add( GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" GIT_TAG "v0.9" PREFIX ${MKLDNN_SOURCES_DIR} - CONFIGURE_COMMAND mkdir -p /build - BUILD_COMMAND cd /build - && cmake .. -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} -DMKLROOT=${MKLDNN_MKLROOT} - && $(MAKE) - INSTALL_COMMAND cd /build && $(MAKE) install UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} + CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} ) ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) -SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIBRARY}) +SET_PROPERTY(TARGET mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) -MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIBRARY}") +MESSAGE(STATUS "Mkldnn library: ${MKLDNN_LIB}") LIST(APPEND external_project_dependencies mkldnn) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 3f940756a4..17a1ca4ed0 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -16,19 +16,23 @@ IF(NOT ${WITH_MKLML}) return() ENDIF(NOT ${WITH_MKLML}) +IF(WIN32 OR APPLE) + MESSAGE(WARNING + "Windows or Mac is not supported with MKLML in Paddle yet." + "Force WITH_MKLML=OFF") + SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) + return() +ENDIF() + INCLUDE(ExternalProject) SET(MKLML_PROJECT "extern_mklml") -SET(MKLML_VER "mklml_lnx_2018.0.20170425") +SET(MKLML_VER "mklml_lnx_2018.0.20170720") SET(MKLML_URL "https://github.com/01org/mkl-dnn/releases/download/v0.9/${MKLML_VER}.tgz") SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") -SET(MKLML_DST_DIR "opt/paddle/third_party/mklml") -SET(MKLML_INSTALL_ROOT "${CMAKE_INSTALL_PREFIX}") -IF(NOT "$ENV{HOME}" STREQUAL "/root") - SET(MKLML_INSTALL_ROOT "$ENV{HOME}") -ENDIF() - +SET(MKLML_DST_DIR "mklml") +SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) SET(MKLML_ROOT ${MKLML_INSTALL_DIR}/${MKLML_VER}) SET(MKLML_INC_DIR ${MKLML_ROOT}/include) diff --git a/paddle/scripts/travis/build_doc.sh b/paddle/scripts/travis/build_doc.sh index a443851580..33fb5d84e2 100755 --- a/paddle/scripts/travis/build_doc.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -6,14 +6,14 @@ mkdir -p $TRAVIS_BUILD_DIR/build cd $TRAVIS_BUILD_DIR/build # Compile paddle binaries first -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_GOLANG=ON -DWITH_STYLE_CHECK=OFF mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DWITH_MKLML=OFF -DWITH_DOC=ON make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links From 18d924173f2b85cc8defd88958bc448077caf1e5 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 2 Aug 2017 19:32:45 +0800 Subject: [PATCH 06/25] Add Gradient Operator for mean --- paddle/operators/mean_op.cc | 12 +++++++++++- paddle/operators/mean_op.cu | 1 + paddle/operators/mean_op.h | 17 +++++++++++++++++ paddle/operators/type_alias.h | 1 + 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index fe34d6ad40..78131b2680 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -33,13 +33,23 @@ public: MeanOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "The input of mean op"); - AddOutput("Out", "The output of mean op"); + AddOutput("Out", "The output of mean op").IgnoreGradient(); AddComment("Mean Operator"); } }; +class MeanGradOp : public OperatorWithKernel { +protected: + void InferShape(const InferShapeContext &ctx) const override { + ctx.Output("X" + GRAD_VAR_SUFFIX()) + ->Resize(ctx.Input("X")->dims()); + } +}; + } // namespace operators } // namespace paddle REGISTER_OP(mean, ops::MeanOp, ops::MeanOpMaker); REGISTER_OP_CPU_KERNEL(mean, ops::MeanKernel); +REGISTER_GRADIENT_OP(mean, mean_grad, ops::MeanGradOp); +REGISTER_OP_CPU_KERNEL(mean_grad, ops::MeanGradKernel); diff --git a/paddle/operators/mean_op.cu b/paddle/operators/mean_op.cu index 740157cbc5..e15de2fd0d 100644 --- a/paddle/operators/mean_op.cu +++ b/paddle/operators/mean_op.cu @@ -3,3 +3,4 @@ #include "paddle/operators/mean_op.h" REGISTER_OP_GPU_KERNEL(mean, ops::MeanKernel); +REGISTER_OP_GPU_KERNEL(mean_grad, ops::MeanGradKernel); \ No newline at end of file diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index 5f7d443751..555b45b070 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -32,5 +32,22 @@ public: } }; +template +class MeanGradKernel : public OpKernel { +public: + void Compute(const ExecutionContext& context) const override { + auto OG = context.Input("Out" + OperatorBase::GRAD_VAR_SUFFIX()); + PADDLE_ENFORCE(framework::product(OG->dims()) == 1, + "Mean Gradient should be scalar"); + auto IG = context.Output("X" + OperatorBase::GRAD_VAR_SUFFIX()); + IG->mutable_data(context.GetPlace()); + + T ig_size = (T)framework::product(IG->dims()); + + EigenVector::Flatten(*IG).device(*(context.GetEigenDevice())) = + EigenScalar::From(*OG) / ig_size; + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h index 93b62cddc8..9049ffda1d 100644 --- a/paddle/operators/type_alias.h +++ b/paddle/operators/type_alias.h @@ -51,6 +51,7 @@ using CPUPlace = platform::CPUPlace; using GPUPlace = platform::GPUPlace; using NetOp = framework::NetOp; using OpRegistry = framework::OpRegistry; +using OperatorBase = framework::OperatorBase; } // namespace operators } // namespace paddle From 3fc68f6f14afb3a0e1aadb0c06d787c2d2b75a66 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 2 Aug 2017 19:37:20 +0800 Subject: [PATCH 07/25] Move pybind.cc/tensor_bind.h to paddle::framework Fix #3171 --- paddle/CMakeLists.txt | 1 - paddle/framework/CMakeLists.txt | 9 ++ paddle/{pybind => framework}/pybind.cc | 96 ++++++++++------------ paddle/framework/tensor.h | 12 ++- paddle/{pybind => framework}/tensor_bind.h | 9 +- paddle/pybind/CMakeLists.txt | 9 -- 6 files changed, 61 insertions(+), 75 deletions(-) rename paddle/{pybind => framework}/pybind.cc (60%) rename paddle/{pybind => framework}/tensor_bind.h (93%) delete mode 100644 paddle/pybind/CMakeLists.txt diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 4b06966fba..f8a88cf317 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -15,7 +15,6 @@ if(Boost_FOUND) add_subdirectory(platform) add_subdirectory(framework) add_subdirectory(operators) - add_subdirectory(pybind) endif() if(WITH_C_API) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 12a3a00bba..b73426eaac 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -36,3 +36,12 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net) cc_library(backward SRCS backward.cc DEPS net) cc_test(backward_test SRCS backward_test.cc DEPS backward) +cc_library(paddle_pybind SHARED + SRCS pybind.cc + DEPS pybind python + fc_op + sgd_op + add_op + mean_op + cross_entropy_op + recurrent_network_op) diff --git a/paddle/pybind/pybind.cc b/paddle/framework/pybind.cc similarity index 60% rename from paddle/pybind/pybind.cc rename to paddle/framework/pybind.cc index 801ef50e57..a735cc2ad5 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/framework/pybind.cc @@ -20,13 +20,12 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" -#include "paddle/pybind/tensor_bind.h" +#include "paddle/framework/tensor_bind.h" #include "pybind11/numpy.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" namespace py = pybind11; -namespace pd = paddle::framework; USE_OP(add_two); USE_OP(onehot_cross_entropy); @@ -38,13 +37,14 @@ USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); USE_OP_WITHOUT_KERNEL(recurrent_op); - +namespace paddle { +namespace framework { template -void ExposeOperator(ClassType& m) { +void ExposeOperator(ClassType &m) { m.def("infer_shape", &ClassType::type::InferShape) .def("run", &ClassType::type::Run) .def("outputs", - [](const typename ClassType::type& op) -> std::vector { + [](const typename ClassType::type &op) -> std::vector { return op.outputs_; }) .def("__str__", &ClassType::type::DebugString); @@ -58,68 +58,58 @@ static size_t UniqueIntegerGenerator() { PYBIND11_PLUGIN(core) { py::module m("core", "C++ core of PaddlePaddle"); - py::class_(m, "Tensor", py::buffer_protocol()) - .def_buffer([](pd::Tensor& self) -> py::buffer_info { - return paddle::pybind::CastToPyBuffer(self); - }) + py::class_(m, "Tensor", py::buffer_protocol()) + .def_buffer( + [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) .def("get_dims", - [](const pd::Tensor& self) { return pd::vectorize(self.dims()); }) + [](const Tensor &self) { return vectorize(self.dims()); }) .def("set_dims", - [](pd::Tensor& self, const std::vector& dim) { - self.Resize(pd::make_ddim(dim)); + [](Tensor &self, const std::vector &dim) { + self.Resize(make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor& self) { + [](Tensor &self) { self.mutable_data(paddle::platform::CPUPlace()); }) .def("alloc_int", - [](pd::Tensor& self) { + [](Tensor &self) { self.mutable_data(paddle::platform::CPUPlace()); }) - .def("set", paddle::pybind::PyTensorSetFromArray) - .def("set", paddle::pybind::PyTensorSetFromArray) - .def("shape", - [](pd::Tensor& self) { return pd::vectorize(self.dims()); }); + .def("set", PyTensorSetFromArray) + .def("set", PyTensorSetFromArray) + .def("shape", [](Tensor &self) { return vectorize(self.dims()); }); - py::class_(m, "Variable", R"DOC(Variable Class. + py::class_(m, "Variable", R"DOC(Variable Class. All parameter, weight, gradient are variables in Paddle. )DOC") - .def("is_int", [](const pd::Variable& var) { return var.IsType(); }) + .def("is_int", [](const Variable &var) { return var.IsType(); }) .def("set_int", - [](pd::Variable& var, int val) -> void { - *var.GetMutable() = val; - }) - .def("get_int", - [](const pd::Variable& var) -> int { return var.Get(); }) + [](Variable &var, int val) -> void { *var.GetMutable() = val; }) + .def("get_int", [](const Variable &var) -> int { return var.Get(); }) .def("get_tensor", - [](pd::Variable& self) -> pd::Tensor* { - return self.GetMutable(); - }, + [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](pd::Variable& self) -> pd::NetOp* { - return self.GetMutable(); - }, + [](Variable &self) -> NetOp * { return self.GetMutable(); }, py::return_value_policy::reference); - py::class_(m, "Scope", "") + py::class_(m, "Scope", "") .def("new_var", - [](pd::Scope& self, const std::string& name) -> pd::Variable* { + [](Scope &self, const std::string &name) -> Variable * { return self.NewVar(name); }, py::return_value_policy::reference) - .def("find_var", &pd::Scope::FindVar, py::return_value_policy::reference) + .def("find_var", &Scope::FindVar, py::return_value_policy::reference) .def(py::init<>()) - .def("new_scope", - [](pd::Scope& self) -> pd::Scope* { return &self.NewScope(); }, + .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, py::return_value_policy::reference) - .def("drop_kids", &pd::Scope::DropKids); + .def("drop_kids", &Scope::DropKids); //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. m.def("get_all_op_protos", []() -> std::vector { - auto& protos = pd::OpRegistry::protos(); + auto &protos = OpRegistry::protos(); std::vector ret_values; for (auto it = protos.begin(); it != protos.end(); ++it) { PADDLE_ENFORCE(it->second.IsInitialized(), @@ -134,47 +124,49 @@ All parameter, weight, gradient are variables in Paddle. m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") - .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) - .def("temp", pd::OperatorBase::TMP_VAR_NAME); + .def("empty", OperatorBase::EMPTY_VAR_NAME) + .def("temp", OperatorBase::TMP_VAR_NAME); py::class_(m, "DeviceContext") - .def_static("cpu_context", []() -> paddle::platform::DeviceContext* { + .def_static("cpu_context", []() -> paddle::platform::DeviceContext * { return new paddle::platform::CPUDeviceContext(); }); - py::class_> operator_base( + py::class_> operator_base( m, "Operator"); operator_base.def_static("create", [](py::bytes protobin) { - pd::OpDesc desc; + OpDesc desc; PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), "Cannot parse user input to OpDesc"); PADDLE_ENFORCE(desc.IsInitialized(), "User OpDesc is not initialized, reason %s", desc.InitializationErrorString()); - return pd::OpRegistry::CreateOp(desc); + return OpRegistry::CreateOp(desc); }); ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &pd::NetOp::AddOp) + .def("add_op", &NetOp::AddOp) .def("add_op", - [](pd::NetOp& self, const std::shared_ptr& net) -> void { - self.AddOp(std::static_pointer_cast(net)); + [](NetOp &self, const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); }) - .def("complete_add_op", &pd::NetOp::CompleteAddOp) + .def("complete_add_op", &NetOp::CompleteAddOp) .def("complete_add_op", - [](std::shared_ptr& self) { self->CompleteAddOp(); }); + [](std::shared_ptr &self) { self->CompleteAddOp(); }); ExposeOperator(net); m.def("unique_integer", UniqueIntegerGenerator); return m.ptr(); } +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 76070f636b..c3e9a914f1 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -26,19 +26,17 @@ limitations under the License. */ #include "unsupported/Eigen/CXX11/Tensor" namespace paddle { -namespace pybind { -namespace details { // forward declare -template -struct CastToPyBufferImpl; -} // namespace details -} // namespace pybind namespace framework { +namespace details { +template +struct CastToPyBufferImpl; +} class Tensor { public: template - friend struct paddle::pybind::details::CastToPyBufferImpl; + friend struct details::CastToPyBufferImpl; template friend struct EigenTensor; diff --git a/paddle/pybind/tensor_bind.h b/paddle/framework/tensor_bind.h similarity index 93% rename from paddle/pybind/tensor_bind.h rename to paddle/framework/tensor_bind.h index 995e102bf9..530b640f70 100644 --- a/paddle/pybind/tensor_bind.h +++ b/paddle/framework/tensor_bind.h @@ -21,7 +21,7 @@ namespace py = pybind11; namespace paddle { -namespace pybind { +namespace framework { namespace details { @@ -59,11 +59,8 @@ struct CastToPyBufferImpl { return py::buffer_info( tensor.mutable_data(tensor.holder_->place()), - sizeof(CUR_TYPE), - py::format_descriptor::format(), - (size_t)framework::arity(tensor.dims()), - dims_outside, - strides); + sizeof(CUR_TYPE), py::format_descriptor::format(), + (size_t)framework::arity(tensor.dims()), dims_outside, strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor); diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt deleted file mode 100644 index 845589dcb1..0000000000 --- a/paddle/pybind/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -cc_library(paddle_pybind SHARED - SRCS pybind.cc - DEPS pybind python - fc_op - sgd_op - add_op - mean_op - cross_entropy_op - recurrent_network_op) From 4dd89e875263f6526044bebb60b82bb97e7571a8 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 2 Aug 2017 19:42:30 +0800 Subject: [PATCH 08/25] change default option for MKLDNN and MKLML --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7d743e193..b174831109 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) From e6f62f79c3e783e3d8da4b76cb601a1590ea937e Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 2 Aug 2017 23:55:30 +0800 Subject: [PATCH 09/25] add meesage and cmake cache arg --- cmake/external/mkldnn.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 33988c66b2..25c6b4ef52 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -43,6 +43,7 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML") SET(MKLDNN_MKLROOT ${MKLML_ROOT}) SET(MKLDNN_IOMP_LIB ${MKLML_IOMP_LIB}) SET(MKLDNN_IOMP_DIR ${MKLML_LIB_DIR}) + MESSAGE(STATUS "Build MKLDNN with ${MKLDNN_MKLROOT}") ENDIF() ExternalProject_Add( @@ -56,6 +57,7 @@ ExternalProject_Add( CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DMKLROOT=${MKLDNN_MKLROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} + -DMKLROOT:PATH=${MKLDNN_MKLROOT} ) ADD_LIBRARY(mkldnn SHARED IMPORTED GLOBAL) From ab18947ea9bbc4989d784d7f247830985ba35e1b Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 2 Aug 2017 16:30:52 -0700 Subject: [PATCH 10/25] Simplify building precess of gradient operator --- paddle/framework/backward.cc | 14 +-- paddle/framework/backward_test.cc | 6 +- paddle/framework/grad_op_builder.cc | 146 +++++++++++++--------------- paddle/framework/grad_op_builder.h | 39 +------- paddle/framework/op_registry.h | 3 +- 5 files changed, 80 insertions(+), 128 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 0da11b91a7..ef68cf7abb 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -42,9 +42,9 @@ static std::shared_ptr NOP() { // // no_grad_names the gradient variable names without gradient calculating. // -// uniq_id is a unique index used inside recursively calling BackwardRecursive. -// use `uid = uniq_id++;` to get the unique index, and pass `uniq_id` through -// recursive calling. +// uniq_id is a unique index used inside recursively calling +// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and +// pass `uniq_id` through recursive calling. // // returns The backward operator. For simple situation, it is a simple // operator. For complex situation, it is a NetOp. @@ -64,8 +64,8 @@ std::shared_ptr BackwardRecursive( return NOP(); } - // All output gradients of forwarding operator do not need to calculate. Then - // all input gradients cannot be computed at all, and we put them into + // All output gradients of forwarding operator do not need to calculate. + // Then all input gradients cannot be computed at all, and we put them into // `no_grad_names` set. Return an NOP. if (AllInSet(forwardOp.outputs_, OperatorBase::GRAD_VAR_SUFFIX(), no_grad_names)) { @@ -83,8 +83,8 @@ std::shared_ptr BackwardRecursive( // Because forwardOp is a net op, it can static_cast. auto& forwardNet = static_cast(forwardOp); - // Map from output gradient variable name to operator's indices in backward - // net. That operator generates that variable. + // Map from output gradient variable name to operator's indices in + // backward net. That operator generates that variable. std::unordered_map> dup_output_ops; size_t local_op_id = 0; diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index b095c2c3d5..81e0a14e8a 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -161,8 +161,8 @@ TEST(Backward, simple_op_grad) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); ASSERT_NE(fwd, nullptr); auto gop = f::OpRegistry::CreateGradOp(*fwd); - ASSERT_EQ(1UL, gop->inputs_.size()); - ASSERT_EQ("Out" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->inputs_[0]); + ASSERT_EQ(4UL, gop->inputs_.size()); + ASSERT_EQ(f::OperatorBase::EMPTY_VAR_NAME(), gop->inputs_[0]); ASSERT_EQ("rowwise_add_grad", gop->type_); ASSERT_EQ("X" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[0]); ASSERT_EQ("b" + f::OperatorBase::GRAD_VAR_SUFFIX(), gop->outputs_[1]); @@ -358,7 +358,7 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { 3UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ - - 1UL /*ignoreGradient varable number*/ + //- 1UL /*ignoreGradient varable number*/ + 2U /* internal variable number*/); EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/ + 2UL /* input number of rowwise_add */ diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index dd686cc782..9f7856a79b 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -13,102 +13,92 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/grad_op_builder.h" +#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" namespace paddle { namespace framework { -OperatorBase* GradOpBuilder::Build() { - BuildOpInOutArgList(); - std::string grad_op_type = OpRegistry::grad_ops().at(op_.type_); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - CompleteGradOp(grad_op); - return grad_op; -} +class OpRegistry; + +using VarIndexMap = std::unordered_map; -OpInOutArg* GradOpBuilder::BuildArg(const VarProto& var, - const VarIndexMap& var_map, - const std::vector& format, - InOutType type) { - int idx = var_map.at(var.name()); - int begin_idx = format.empty() ? idx : format.at(idx); - int end_idx = format.empty() ? idx + 1 : format.at(idx + 1); - return new OpInOutArg(var.name(), type, !var.ignore_gradient(), begin_idx, - end_idx); +enum OpArgType { IN, OUT }; + +static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { + std::string key = type == IN ? "input_format" : "output_name"; + return op->attrs_.count(key) + ? &boost::get>(op->attrs_.at(key)) + : nullptr; } -void GradOpBuilder::BuildOpInOutArgList() { - const OpProto& op_proto = OpRegistry::protos().at(op_.type_); - const auto& var_map = *(OpRegistry::VarIndexMaps().at(op_.type_)); - const std::vector& in_format = - op_.attrs_.count("input_format") - ? op_.GetAttr>("input_format") - : std::vector(); - const std::vector& out_format = - op_.attrs_.count("output_format") - ? op_.GetAttr>("output_format") - : std::vector(); - for (const auto& var : op_proto.inputs()) { - arg_list_.emplace_back( - std::shared_ptr(BuildArg(var, var_map, in_format, IN))); - } - for (const auto& var : op_proto.outputs()) { - arg_list_.emplace_back( - std::shared_ptr(BuildArg(var, var_map, out_format, OUT))); - } +static const std::vector* GetOpFormat(const OperatorBase* op, + const OpArgType& type) { + std::string key = type == IN ? "input_format" : "output_name"; + return op->attrs_.count(key) + ? &boost::get>(op->attrs_.at(key)) + : nullptr; } -void GradOpBuilder::AddArgIntoGradOp(const OpInOutArg* arg, - std::vector& in_out, - std::vector& format, - VarIndexMap* varmap, int& idx, - bool is_grad) const { - std::string var_name = arg->proto_name_; - if (is_grad) { - var_name += OperatorBase::GRAD_VAR_SUFFIX(); - } - (*varmap)[var_name] = idx++; - size_t pre_sz = in_out.size(); - auto base_it = arg->type_ == IN ? op_.inputs_.begin() : op_.outputs_.begin(); - std::copy(base_it + arg->begin_idx_, base_it + arg->end_idx_, - std::back_inserter(in_out)); - if (is_grad) { - for (size_t i = pre_sz; i < in_out.size(); ++i) { - in_out[i] += OperatorBase::GRAD_VAR_SUFFIX(); +static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, + const OpArgType& src_type, const OpArgType& dst_type, + int& idx, bool is_grad) { + const std::vector& src_inout = + src_type == IN ? src_op->inputs_ : src_op->outputs_; + const VarIndexMap& src_varmap = *src_op->in_out_idxs_; + const std::vector* src_format = GetOpFormat(src_op, src_type); + + std::vector& dst_inout = + dst_type == IN ? dst_op->inputs_ : dst_op->outputs_; + VarIndexMap& dst_varmap = *dst_op->in_out_idxs_; + std::vector* dst_format = GetOpFormat(dst_op, dst_type); + const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const auto& src_arg_list = src_type == IN ? proto.inputs() : proto.outputs(); + + for (const auto& arg : src_arg_list) { + std::string src_name = arg.name(); + std::string dst_name = + is_grad ? src_name + OperatorBase::GRAD_VAR_SUFFIX() : src_name; + dst_varmap[dst_name] = idx++; + int src_arg_idx = src_varmap.at(src_name); + int src_begin = + src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); + int src_end = src_format == nullptr ? src_arg_idx + 1 + : src_format->at(src_arg_idx + 1); + for (int i = src_begin; i < src_end; ++i) { + std::string s = is_grad ? src_inout[i] + OperatorBase::GRAD_VAR_SUFFIX() + : arg.ignore_gradient() + ? OperatorBase::EMPTY_VAR_NAME() + : src_inout[i]; + dst_inout.emplace_back(s); + } + if (dst_format != nullptr) { + dst_format->push_back(dst_inout.size()); } } - format.push_back(in_out.size()); } -void GradOpBuilder::CompleteGradOp(OperatorBase* grad_op) const { - grad_op->attrs_ = op_.attrs_; +OperatorBase* BuildGradOp(const OperatorBase* op) { + std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); + OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); + grad_op->type_ = grad_op_type; + grad_op->attrs_ = op->attrs_; grad_op->attrs_.erase("input_format"); grad_op->attrs_.erase("output_format"); - VarIndexMap* grad_varmap = new VarIndexMap(); + if (GetOpFormat(op, OUT) != nullptr) { + grad_op->attrs_["output_format"] = std::vector({0}); + } + if (GetOpFormat(op, IN) != nullptr || GetOpFormat(op, OUT) != nullptr) { + grad_op->attrs_["input_format"] = std::vector({0}); + } + grad_op->in_out_idxs_.reset(new VarIndexMap()); int in_idx = 0; int out_idx = 0; - std::vector in_format({0}); - std::vector out_format({0}); - for (const auto& arg : arg_list_) { - // op_'s inputs_ and outputs_ - if (arg->needed_in_grad_) { - AddArgIntoGradOp(arg.get(), grad_op->inputs_, in_format, grad_varmap, - in_idx, false); - } - if (arg->type_ == IN) { - // gradients of op_'s inputs_ - AddArgIntoGradOp(arg.get(), grad_op->outputs_, out_format, grad_varmap, - out_idx, true); - } else { - // gradients of op_'s outputs_ - AddArgIntoGradOp(arg.get(), grad_op->inputs_, in_format, grad_varmap, - in_idx, true); - } - } - grad_op->attrs_["input_format"] = in_format; - grad_op->attrs_["output_format"] = out_format; - grad_op->in_out_idxs_.reset(grad_varmap); + TransOpArg(op, grad_op, IN, IN, in_idx, false); // I + TransOpArg(op, grad_op, OUT, IN, in_idx, false); // G + TransOpArg(op, grad_op, OUT, IN, in_idx, true); // OG + TransOpArg(op, grad_op, IN, OUT, out_idx, true); // IG + return grad_op; } } // namespace framework diff --git a/paddle/framework/grad_op_builder.h b/paddle/framework/grad_op_builder.h index cc7a76f372..cf235de6c2 100644 --- a/paddle/framework/grad_op_builder.h +++ b/paddle/framework/grad_op_builder.h @@ -1,48 +1,11 @@ #pragma once -#include "paddle/framework/op_proto.pb.h" #include "paddle/framework/operator.h" namespace paddle { namespace framework { -class OpRegistry; -enum InOutType { IN, OUT }; - -struct OpInOutArg { - OpInOutArg(const std::string& proto_name, const InOutType& type, - bool needed_in_grad, size_t begin_idx, size_t end_idx) - : proto_name_(proto_name), - type_(type), - needed_in_grad_(needed_in_grad), - begin_idx_(begin_idx), - end_idx_(end_idx) {} - - std::string proto_name_; - InOutType type_; - bool needed_in_grad_; - size_t begin_idx_; - size_t end_idx_; -}; - -class GradOpBuilder { - using VarIndexMap = std::unordered_map; - - public: - GradOpBuilder(const OperatorBase& op) : op_(op) {} - OperatorBase* Build(); - - private: - OpInOutArg* BuildArg(const VarProto& var, const VarIndexMap& var_map, - const std::vector& format, InOutType type); - void BuildOpInOutArgList(); - void AddArgIntoGradOp(const OpInOutArg* arg, std::vector& in_out, - std::vector& format, VarIndexMap* varmap, int& idx, - bool is_grad) const; - void CompleteGradOp(OperatorBase* grad_op) const; - const OperatorBase& op_; - std::vector> arg_list_; -}; +OperatorBase* BuildGradOp(const OperatorBase* op); } // namespace framework } // namespace paddle diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index f10c929798..7e70a83fa8 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -306,8 +306,7 @@ class OpRegistry { static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); - GradOpBuilder builder(op); - std::shared_ptr grad_op(builder.Build()); + std::shared_ptr grad_op(BuildGradOp(&op)); grad_op->Init(); return grad_op; } From 5e37872462c7dfec33f8da80335520a645beb1b8 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 2 Aug 2017 16:56:40 -0700 Subject: [PATCH 11/25] Refine code --- paddle/framework/grad_op_builder.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 9f7856a79b..afb8a2cfe1 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -45,12 +45,10 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, int& idx, bool is_grad) { const std::vector& src_inout = src_type == IN ? src_op->inputs_ : src_op->outputs_; - const VarIndexMap& src_varmap = *src_op->in_out_idxs_; const std::vector* src_format = GetOpFormat(src_op, src_type); std::vector& dst_inout = dst_type == IN ? dst_op->inputs_ : dst_op->outputs_; - VarIndexMap& dst_varmap = *dst_op->in_out_idxs_; std::vector* dst_format = GetOpFormat(dst_op, dst_type); const OpProto& proto = OpRegistry::protos().at(src_op->type_); const auto& src_arg_list = src_type == IN ? proto.inputs() : proto.outputs(); @@ -59,8 +57,8 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, std::string src_name = arg.name(); std::string dst_name = is_grad ? src_name + OperatorBase::GRAD_VAR_SUFFIX() : src_name; - dst_varmap[dst_name] = idx++; - int src_arg_idx = src_varmap.at(src_name); + (*dst_op->in_out_idxs_)[dst_name] = idx++; + int src_arg_idx = src_op->in_out_idxs_->at(src_name); int src_begin = src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); int src_end = src_format == nullptr ? src_arg_idx + 1 From 8162ecd284102073ebdac6597e897a301dd67668 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 2 Aug 2017 17:39:46 -0700 Subject: [PATCH 12/25] Move paddle/framework/detail/tensor-inl.h into framework/tensor_impl.h --- paddle/framework/tensor.h | 2 +- paddle/framework/{detail/tensor-inl.h => tensor_impl.h} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename paddle/framework/{detail/tensor-inl.h => tensor_impl.h} (100%) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 76070f636b..7854e6f0d5 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -167,4 +167,4 @@ class Tensor { } // namespace framework } // namespace paddle -#include "paddle/framework/detail/tensor-inl.h" +#include "paddle/framework/detail/tensor_impl.h" diff --git a/paddle/framework/detail/tensor-inl.h b/paddle/framework/tensor_impl.h similarity index 100% rename from paddle/framework/detail/tensor-inl.h rename to paddle/framework/tensor_impl.h From 5a75d103690924ee80bc4ea2abfd117f2f570947 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Wed, 2 Aug 2017 17:58:48 -0700 Subject: [PATCH 13/25] Correct path --- paddle/framework/tensor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 7854e6f0d5..85af0e20a4 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -167,4 +167,4 @@ class Tensor { } // namespace framework } // namespace paddle -#include "paddle/framework/detail/tensor_impl.h" +#include "paddle/framework/tensor_impl.h" From 26ab4538015662130029f715ef7764c16df86cbe Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 2 Aug 2017 19:34:11 -0700 Subject: [PATCH 14/25] enum ==> enum class --- paddle/framework/backward_test.cc | 1 - paddle/framework/grad_op_builder.cc | 32 +++++++++++++++-------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 81e0a14e8a..2259735840 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -358,7 +358,6 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { 3UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ - //- 1UL /*ignoreGradient varable number*/ + 2U /* internal variable number*/); EXPECT_EQ(grad_fc.outputs_.size(), 2UL /* input number of mul*/ + 2UL /* input number of rowwise_add */ diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index afb8a2cfe1..34722fedf9 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -8,9 +8,9 @@ You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ +WITHOpArgType::OUT WARRANTIES OR CONDITIONS OF ANY KOpArgType::IND, either +express or implied. See the License for the specific language governing +permissions and limitations under the License. */ #include "paddle/framework/grad_op_builder.h" #include "paddle/framework/op_proto.pb.h" @@ -23,10 +23,10 @@ class OpRegistry; using VarIndexMap = std::unordered_map; -enum OpArgType { IN, OUT }; +enum class OpArgType { IN, OUT }; static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == IN ? "input_format" : "output_name"; + std::string key = type == OpArgType::IN ? "input_format" : "output_name"; return op->attrs_.count(key) ? &boost::get>(op->attrs_.at(key)) : nullptr; @@ -34,7 +34,7 @@ static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { static const std::vector* GetOpFormat(const OperatorBase* op, const OpArgType& type) { - std::string key = type == IN ? "input_format" : "output_name"; + std::string key = type == OpArgType::IN ? "input_format" : "output_name"; return op->attrs_.count(key) ? &boost::get>(op->attrs_.at(key)) : nullptr; @@ -44,14 +44,15 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, const OpArgType& src_type, const OpArgType& dst_type, int& idx, bool is_grad) { const std::vector& src_inout = - src_type == IN ? src_op->inputs_ : src_op->outputs_; + src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; const std::vector* src_format = GetOpFormat(src_op, src_type); std::vector& dst_inout = - dst_type == IN ? dst_op->inputs_ : dst_op->outputs_; + dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; std::vector* dst_format = GetOpFormat(dst_op, dst_type); const OpProto& proto = OpRegistry::protos().at(src_op->type_); - const auto& src_arg_list = src_type == IN ? proto.inputs() : proto.outputs(); + const auto& src_arg_list = + src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { std::string src_name = arg.name(); @@ -83,19 +84,20 @@ OperatorBase* BuildGradOp(const OperatorBase* op) { grad_op->attrs_ = op->attrs_; grad_op->attrs_.erase("input_format"); grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OUT) != nullptr) { + if (GetOpFormat(op, OpArgType::OUT) != nullptr) { grad_op->attrs_["output_format"] = std::vector({0}); } - if (GetOpFormat(op, IN) != nullptr || GetOpFormat(op, OUT) != nullptr) { + if (GetOpFormat(op, OpArgType::IN) != nullptr || + GetOpFormat(op, OpArgType::OUT) != nullptr) { grad_op->attrs_["input_format"] = std::vector({0}); } grad_op->in_out_idxs_.reset(new VarIndexMap()); int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_op, IN, IN, in_idx, false); // I - TransOpArg(op, grad_op, OUT, IN, in_idx, false); // G - TransOpArg(op, grad_op, OUT, IN, in_idx, true); // OG - TransOpArg(op, grad_op, IN, OUT, out_idx, true); // IG + TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I + TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G + TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG + TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG return grad_op; } From 858b97cbea7e04f5f51a35b00deffbf3900b015b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 3 Aug 2017 14:49:56 +0800 Subject: [PATCH 15/25] Fix CPU compile --- paddle/math/BaseMatrix.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/math/BaseMatrix.cu b/paddle/math/BaseMatrix.cu index 6db5965789..ba2b47d6cc 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/math/BaseMatrix.cu @@ -442,7 +442,8 @@ DEFINE_MATRIX_UNARY_PARAMETER_OP(Clip, TWO_PARAMETER, template void BaseMatrixT::clip(T p1, T p2) { applyUnary(unary::Clip(p1, p2)); } -DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER, a = b < p1 ? 0 : (b > p2 ? 0 : 1)); +DEFINE_MATRIX_BINARY_PARAMETER_OP(ClipDerivative, TWO_PARAMETER, + a = b < p1 ? 0 : (b > p2 ? 0 : 1)); template void BaseMatrixT::clipDerivative(BaseMatrixT& b, T p1, T p2) { applyBinary(binary::ClipDerivative(p1, p2), b); From 57f57ead32e8f0fe4b89c094ae17fb2926336047 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 3 Aug 2017 14:58:39 +0800 Subject: [PATCH 16/25] Add @EMPTY@ to default no_grad_vars in `Backward` --- paddle/framework/backward.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 0da11b91a7..e784bb2b7d 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -168,6 +168,9 @@ std::shared_ptr Backward( std::unordered_set no_grad_names; no_grad_names.reserve(no_grad_vars.size()); + no_grad_names.insert(OperatorBase::EMPTY_VAR_NAME() + + OperatorBase::GRAD_VAR_SUFFIX()); + for (auto& name : no_grad_vars) { no_grad_names.insert(name + OperatorBase::GRAD_VAR_SUFFIX()); } From fa66cc77ff58f6e4cc6a62cf5c3cff1ab648ed68 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 3 Aug 2017 15:27:30 +0800 Subject: [PATCH 17/25] Mean Op Merge Develop --- paddle/operators/mean_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/operators/mean_op.h b/paddle/operators/mean_op.h index a89cb422f9..e712dee6a7 100644 --- a/paddle/operators/mean_op.h +++ b/paddle/operators/mean_op.h @@ -47,7 +47,7 @@ public: T ig_size = (T)framework::product(IG->dims()); - EigenVector::Flatten(*IG).device(*(context.GetEigenDevice())) = + EigenVector::Flatten(*IG).device(context.GetEigenDevice()) = EigenScalar::From(*OG) / ig_size; } }; From 26f43a8c2a848b2cd5067c4de03c32fd9f640da3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 3 Aug 2017 15:50:22 +0800 Subject: [PATCH 18/25] Faster build --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 69ae0ea2d7..5b176f8c92 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -69,7 +69,7 @@ cat < Date: Thu, 3 Aug 2017 16:49:06 +0800 Subject: [PATCH 19/25] Parallel CPack --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 5b176f8c92..8de0e608c1 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -122,7 +122,7 @@ cat < Date: Thu, 3 Aug 2017 20:34:40 +0800 Subject: [PATCH 20/25] Merge Develop --- paddle/framework/CMakeLists.txt | 2 +- paddle/framework/pybind.cc | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index f8f9bae12d..cbf950d54b 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -44,4 +44,4 @@ cc_library(paddle_pybind SHARED add_op mean_op cross_entropy_op - recurrent_network_op) + recurrent_op) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index cc47469b4d..e9b83f4007 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -83,29 +83,28 @@ PYBIND11_PLUGIN(core) { self.Resize(make_ddim(dim)); }) .def("alloc_float", - [](pd::Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::GPUPlace &place) { self.mutable_data(place); }) .def("alloc_float", - [](pd::Tensor &self, paddle::platform::CPUPlace &place) { + [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor &self, paddle::platform::CPUPlace &place) { + [](Tensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("alloc_int", - [](pd::Tensor &self, paddle::platform::GPUPlace &place) { + [](Tensor &self, paddle::platform::GPUPlace &place) { self.mutable_data(place); }) - .def("set", paddle::pybind::PyCPUTensorSetFromArray) - .def("set", paddle::pybind::PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray) + .def("set", PyCPUTensorSetFromArray) #ifndef PADDLE_ONLY_CPU - .def("set", paddle::pybind::PyCUDATensorSetFromArray) - .def("set", paddle::pybind::PyCUDATensorSetFromArray) + .def("set", PyCUDATensorSetFromArray) + .def("set", PyCUDATensorSetFromArray) #endif - .def("shape", - [](pd::Tensor &self) { return pd::vectorize(self.dims()); }); + .def("shape", [](Tensor &self) { return vectorize(self.dims()); }); py::class_(m, "Variable", R"DOC(Variable Class. @@ -152,8 +151,8 @@ All parameter, weight, gradient are variables in Paddle. m.def_submodule( "var_names", "The module will return special predefined variable name in Paddle") - .def("empty", pd::OperatorBase::EMPTY_VAR_NAME) - .def("temp", pd::OperatorBase::TMP_VAR_NAME); + .def("empty", OperatorBase::EMPTY_VAR_NAME) + .def("temp", OperatorBase::TMP_VAR_NAME); // clang-format off py::class_(m, "DeviceContext") .def_static("create", @@ -190,9 +189,9 @@ All parameter, weight, gradient are variables in Paddle. }); operator_base.def("backward", - [](const pd::OperatorBase &forwardOp, + [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { - return pd::Backward(forwardOp, no_grad_vars); + return Backward(forwardOp, no_grad_vars); }); ExposeOperator(operator_base); From fe5bca49b89085fcf087ced9ec9a9b802bb56ae3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 3 Aug 2017 20:35:41 +0800 Subject: [PATCH 21/25] Change `tensor_bind.h` -> `tensor_py.h` --- paddle/framework/pybind.cc | 2 +- paddle/framework/{tensor_bind.h => tensor_py.h} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename paddle/framework/{tensor_bind.h => tensor_py.h} (100%) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index e9b83f4007..1837591e98 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" -#include "paddle/framework/tensor_bind.h" +#include "paddle/framework/tensor_py.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "pybind11/numpy.h" diff --git a/paddle/framework/tensor_bind.h b/paddle/framework/tensor_py.h similarity index 100% rename from paddle/framework/tensor_bind.h rename to paddle/framework/tensor_py.h From d953611e865cabecbcf967028c3bc78bc34f8f53 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Thu, 3 Aug 2017 23:06:12 +0800 Subject: [PATCH 22/25] Softmax grad op (#3164) * init softmax grad op * add compute code * export Backward to python * update test ,export op.type to python * update python test, fix compute bug * update unit test * use eigen * optimize eigen code * add gpu test * register softmax_grad GPU kernel and fix test bug * typo * follow comments --- paddle/framework/operator.h | 4 ++ paddle/operators/softmax_op.cc | 49 ++++++++------ paddle/operators/softmax_op.cu | 1 + paddle/operators/softmax_op.h | 58 +++++++++++++---- paddle/operators/type_alias.h | 1 + .../v2/framework/tests/test_softmax_op.py | 64 ++++++++++++++++++- 6 files changed, 147 insertions(+), 30 deletions(-) diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 5543510348..0b58829716 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -55,6 +55,10 @@ class OperatorBase { /// e.g. Variable "x@GRAD" is the gradient of varibale "x". static std::string GRAD_VAR_SUFFIX() { return "@GRAD"; } + static std::string GRAD_VAR_NAME(const std::string& name) { + return name + GRAD_VAR_SUFFIX(); + } + /// Variables with this suffix are supposed to be filled up with zeros. static std::string ZERO_VAR_SUFFIX() { return "@ZERO"; } diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 5b59fad7d5..5cbb96ab75 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -1,16 +1,17 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ #include "paddle/operators/softmax_op.h" namespace paddle { @@ -19,12 +20,13 @@ namespace operators { class SoftmaxOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1, "Only one input is need for softmax"); - PADDLE_ENFORCE(ctx.Input(0)->dims().size() == 2, + PADDLE_ENFORCE(ctx.InputSize() == 1UL, + "Only one input is need for softmax"); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be matrix"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, + PADDLE_ENFORCE(ctx.OutputSize() == 1UL, "Only one output is need for softmax"); - ctx.Output(0)->Resize(ctx.Input(0)->dims()); + ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -40,10 +42,19 @@ public: class SoftmaxOpGrad : public OperatorWithKernel { protected: - void InferShape(const InferShapeContext &ctx) const override {} - std::string DebugString() const override { - LOG(INFO) << "SoftmaxOpGrad"; - return ""; + void InferShape(const InferShapeContext &ctx) const override { + PADDLE_ENFORCE(ctx.InputSize() == 3UL, + "Input of SoftmaxOpGrad should be 3, X, Y, YG"); + PADDLE_ENFORCE(ctx.OutputSize() == 1UL, + "Output of SoftmaxOpGrad should be 1"); + PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); + PADDLE_ENFORCE(ctx.InputVar(GRAD_VAR_NAME("Y")) != nullptr, + "Input(Y@GRAD) should not be null"); + PADDLE_ENFORCE(ctx.Input("Y")->dims() == + ctx.Input(GRAD_VAR_NAME("Y"))->dims(), + "the shape of Input(0) and Input(1) should be the same"); + ctx.Output(GRAD_VAR_NAME("X")) + ->Resize(ctx.Input("Y")->dims()); } }; @@ -51,5 +62,7 @@ protected: } // namespace paddle REGISTER_OP(softmax, ops::SoftmaxOp, ops::SoftmaxOpMaker); -REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); REGISTER_OP_CPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_GRADIENT_OP(softmax, softmax_grad, ops::SoftmaxOpGrad); +REGISTER_OP_CPU_KERNEL(softmax_grad, + ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.cu b/paddle/operators/softmax_op.cu index ddf8f6e913..8c652213f2 100644 --- a/paddle/operators/softmax_op.cu +++ b/paddle/operators/softmax_op.cu @@ -3,3 +3,4 @@ #include "paddle/operators/softmax_op.h" REGISTER_OP_GPU_KERNEL(softmax, ops::SoftmaxKernel); +REGISTER_OP_GPU_KERNEL(softmax_grad, ops::SoftmaxGradKernel); diff --git a/paddle/operators/softmax_op.h b/paddle/operators/softmax_op.h index 75c5197697..13e74a7907 100644 --- a/paddle/operators/softmax_op.h +++ b/paddle/operators/softmax_op.h @@ -1,19 +1,22 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ #pragma once +#include "paddle/framework/ddim.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/tensor.h" #include "paddle/operators/type_alias.h" namespace paddle { @@ -23,8 +26,8 @@ template class SoftmaxKernel : public OpKernel { public: void Compute(const ExecutionContext& context) const override { - auto input = context.Input(0); - auto output = context.Output(0); + auto input = context.Input("X"); + auto output = context.Output("Y"); output->mutable_data(context.GetPlace()); auto logits = EigenMatrix::From(*input); @@ -57,5 +60,38 @@ public: .broadcast(one_by_class)); } }; + +template +class SoftmaxGradKernel : public OpKernel { +public: + void Compute(const ExecutionContext& context) const override { + std::shared_ptr scale_ = std::make_shared(); + + auto Y = context.Input("Y"); + auto dY = context.Input(OperatorBase::GRAD_VAR_NAME("Y")); + auto dX = context.Output(OperatorBase::GRAD_VAR_NAME("X")); + dX->mutable_data(context.GetPlace()); + + const int batch_size = Y->dims()[0]; + const int class_num = Y->dims()[1]; + + Eigen::DSizes along_class(1); + Eigen::DSizes batch_by_one(batch_size, 1); + Eigen::DSizes one_by_class(1, class_num); + + auto Y_eigen = EigenMatrix::From(*Y); + auto dY_eigen = EigenMatrix::From(*dY); + auto dX_eigen = EigenMatrix::From(*dX); + auto place = context.GetEigenDevice(); + + auto dot = (Y_eigen * dY_eigen) + .sum(along_class) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class); + dX_eigen.device(place) = (dY_eigen - dot) * Y_eigen; + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h index 9049ffda1d..4ee08a099d 100644 --- a/paddle/operators/type_alias.h +++ b/paddle/operators/type_alias.h @@ -22,6 +22,7 @@ namespace paddle { namespace operators { using OpKernel = framework::OpKernel; +using OperatorBase = framework::OperatorBase; using InferShapeContext = framework::InferShapeContext; using ExecutionContext = framework::ExecutionContext; using Variable = framework::Variable; diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index 191b698c1c..c808881287 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -1,6 +1,10 @@ import unittest -from op_test_util import OpTestMeta + import numpy as np +import paddle.v2.framework.core as core +import paddle.v2.framework.create_op_creation_methods as creation + +from op_test_util import OpTestMeta def stable_softmax(x): @@ -19,5 +23,63 @@ class TestSoftmaxOp(unittest.TestCase): self.Y = np.apply_along_axis(stable_softmax, 1, self.X) +class TestSoftmaxGradOp(unittest.TestCase): + def test_softmax_grad(self): + op = creation.op_creations.softmax(X="X", Y="Y") + backward_op = core.Operator.backward(op, set()) + self.assertEqual(backward_op.type(), "softmax_grad") + expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' + self.assertEqual(expected, str(backward_op)) + + batch_size = 3 + class_num = 5 + # Initialize X and add 1e-2 for numerical stability + Y = np.random.rand(batch_size, class_num).astype(np.float32) + Y = Y + 1e-2 + dY = np.random.rand(batch_size, class_num).astype(np.float32) + + # Reference implementation of cross entropy with soft labels + def label_softmax_grad(Y, dY): + dX = Y * 0.0 + for i in range(batch_size): + d = np.dot(Y[i, :], dY[i, :]) + dX[i, :] = Y[i, :] * (dY[i, :] - d) + return dX + + expected = label_softmax_grad(Y, dY) + + scope = core.Scope() + places = [] + places.append(core.CPUPlace()) + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + + for place in places: + y = scope.new_var("Y") + y_tensor = y.get_tensor() + y_tensor.set_dims([batch_size, class_num]) + y_tensor.alloc_float(place) + y_tensor.set(Y, place) + + dy = scope.new_var("Y@GRAD") + dy_tensor = dy.get_tensor() + dy_tensor.set_dims([batch_size, class_num]) + dy_tensor.alloc_float(place) + dy_tensor.set(dY, place) + + x = scope.new_var("X") + dx = scope.new_var("X@GRAD") + + tensor = scope.find_var("X@GRAD").get_tensor() + backward_op.infer_shape(scope) + self.assertEqual([batch_size, class_num], tensor.shape()) + + ctx = core.DeviceContext.create(place) + backward_op.run(scope, ctx) + actual = np.array(tensor) + + np.testing.assert_almost_equal(actual, expected, decimal=3) + + if __name__ == '__main__': unittest.main() From 0bd49a50ce465082e8938fd3241969c3a6dd7b46 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Thu, 3 Aug 2017 08:14:11 -0700 Subject: [PATCH 23/25] move net_op to operators/ (#3201) * move net_op to operators --- paddle/framework/CMakeLists.txt | 5 +-- paddle/framework/backward.cc | 8 ++--- paddle/framework/backward_test.cc | 32 ++++++++++--------- paddle/framework/pybind.cc | 29 ++++++++++------- paddle/operators/CMakeLists.txt | 7 ++-- .../{framework/net.cc => operators/net_op.cc} | 6 ++-- .../{framework/net.h => operators/net_op.h} | 20 ++++++------ .../net_op_design.md} | 0 .../{framework => operators}/net_op_test.cc | 18 ++++++----- paddle/operators/recurrent_op.cc | 2 +- paddle/operators/recurrent_op_test.cc | 5 +-- paddle/operators/type_alias.h | 7 ++-- 12 files changed, 76 insertions(+), 63 deletions(-) rename paddle/{framework/net.cc => operators/net_op.cc} (96%) rename paddle/{framework/net.h => operators/net_op.h} (89%) rename paddle/{framework/net_design.md => operators/net_op_design.md} (100%) rename paddle/{framework => operators}/net_op_test.cc (91%) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index cbf950d54b..9c39430835 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -31,10 +31,7 @@ py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) -cc_library(net SRCS net.cc DEPS op_registry) -cc_test(net_op_test SRCS net_op_test.cc DEPS net) - -cc_library(backward SRCS backward.cc DEPS net) +cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) cc_library(paddle_pybind SHARED SRCS pybind.cc diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index e784bb2b7d..9730fdd18b 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -14,8 +14,8 @@ #include "paddle/framework/backward.h" #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace framework { @@ -32,7 +32,7 @@ static bool AllInSet(const std::vector& names, } static std::shared_ptr NOP() { - auto net_op = std::make_shared(); + auto net_op = std::make_shared(); net_op->type_ = "@NOP@"; net_op->CompleteAddOp(); return net_op; @@ -77,11 +77,11 @@ std::shared_ptr BackwardRecursive( } // Returned gradient network - auto net = std::make_shared(); + auto net = std::make_shared(); if (forwardOp.IsNetOp()) { // Because forwardOp is a net op, it can static_cast. - auto& forwardNet = static_cast(forwardOp); + auto& forwardNet = static_cast(forwardOp); // Map from output gradient variable name to operator's indices in backward // net. That operator generates that variable. diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index b095c2c3d5..8adf7e4365 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -15,8 +15,9 @@ #include "paddle/framework/backward.h" #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace framework { @@ -70,7 +71,7 @@ class NoGradOpMaker : public OpProtoAndCheckerMaker { } }; -class FcOp : public NetOp { +class FcOp : public ops::NetOp { public: void Init() override { AddOp(OpRegistry::CreateOp("mul", {Input("X"), Input("W")}, @@ -182,7 +183,8 @@ TEST(Backward, simple_op_not_need_grad) { auto no_input_gop = f::Backward(*fwd, {"X", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); - ASSERT_EQ(0UL, std::static_pointer_cast(no_input_gop)->ops_.size()); + ASSERT_EQ(0UL, + std::static_pointer_cast(no_input_gop)->ops_.size()); } TEST(Backward, net_fc_backward_normal) { @@ -191,7 +193,7 @@ TEST(Backward, net_fc_backward_normal) { ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); + auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); @@ -214,7 +216,7 @@ TEST(Backward, net_fc_backward_not_have_b) { ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, {}); ASSERT_TRUE(gop->IsNetOp()); - auto net = static_cast(gop.get()); + auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); @@ -228,7 +230,7 @@ TEST(Backward, net_fc_backward_not_have_b) { } TEST(Backward, net_input_of_network_not_need_grad) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("fc", {"X", "W1", "b1"}, {"mul_tmp_0", "add_tmp_0", "hidden0"}, {})); net.AddOp(f::OpRegistry::CreateOp("fc", {"hidden0", "W2", "b2"}, @@ -236,7 +238,7 @@ TEST(Backward, net_input_of_network_not_need_grad) { net.CompleteAddOp(); auto bwd = Backward(net, {"X"}); // X@GRAD is not need. ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); + auto bwd_net = static_cast(bwd.get()); std::unordered_set all_output = std::unordered_set( bwd_net->outputs_.begin(), bwd_net->outputs_.end()); @@ -253,7 +255,7 @@ TEST(Backward, net_input_of_network_not_need_grad) { ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); - auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); + auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ( f::OperatorBase::EMPTY_VAR_NAME(), @@ -261,14 +263,14 @@ TEST(Backward, net_input_of_network_not_need_grad) { } TEST(Backward, net_shared_weight) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("mul", {"X", "W"}, {"Out"}, {})); net.AddOp(f::OpRegistry::CreateOp("mul", {"Out", "W"}, {"FinalOut"}, {})); net.CompleteAddOp(); auto bwd = f::Backward(net, {}); ASSERT_TRUE(bwd->IsNetOp()); - auto bwd_net = static_cast(bwd.get()); + auto bwd_net = static_cast(bwd.get()); ASSERT_EQ(3UL, bwd_net->ops_.size()); ASSERT_EQ("add", bwd_net->ops_[2]->type_); } @@ -285,7 +287,7 @@ TEST(Backward, op_all_input_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto backward = f::Backward(*fwd, {"X", "b"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } @@ -293,7 +295,7 @@ TEST(Backward, op_all_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {"X", "b"}, {"Out"}, {}); auto backward = f::Backward(*fwd, {"Out"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } @@ -301,7 +303,7 @@ TEST(Backward, op_part_of_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("many_output_op", {"X"}, {"Y", "Z"}, {}); auto backward = f::Backward(*fwd, {"Z"}); ASSERT_TRUE(backward->IsNetOp()); - auto net = static_cast(backward.get()); + auto net = static_cast(backward.get()); ASSERT_EQ(net->ops_.size(), 2UL); auto &fill_zero = *net->ops_[0]; @@ -341,7 +343,7 @@ TEST(Backward, op_part_of_input_are_not_need) { } TEST(Backward, linear_net_intermediate_variable_has_no_grad) { - f::NetOp net; + ops::NetOp net; net.AddOp(f::OpRegistry::CreateOp("fc", {"x1", "w1", "b1"}, {"mul_out1", "add_out1", "out1"}, {})); net.AddOp(f::OpRegistry::CreateOp("fc", {"out1", "w2", "b2"}, @@ -351,7 +353,7 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { net.CompleteAddOp(); auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); ASSERT_TRUE(backward->IsNetOp()); - auto bwd_net = static_cast(backward.get()); + auto bwd_net = static_cast(backward.get()); ASSERT_EQ(bwd_net->ops_.size(), 3UL); auto &grad_fc = *bwd_net->ops_[0]; EXPECT_EQ(grad_fc.inputs_.size(), diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index 1837591e98..b4f0f3ef7e 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -17,11 +17,12 @@ limitations under the License. */ #include #include "paddle/framework/backward.h" -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/scope.h" #include "paddle/framework/tensor_py.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/type_alias.h" #include "paddle/platform/enforce.h" #include "paddle/platform/place.h" #include "pybind11/numpy.h" @@ -118,7 +119,9 @@ All parameter, weight, gradient are variables in Paddle. [](Variable &self) -> Tensor * { return self.GetMutable(); }, py::return_value_policy::reference) .def("get_net", - [](Variable &self) -> NetOp * { return self.GetMutable(); }, + [](Variable &self) -> ops::NetOp * { + return self.GetMutable(); + }, py::return_value_policy::reference); py::class_(m, "Scope", "") @@ -196,22 +199,24 @@ All parameter, weight, gradient are variables in Paddle. ExposeOperator(operator_base); - py::class_> net(m, "Net"); + py::class_> net(m, "Net"); net.def_static("create", - []() -> std::shared_ptr { - auto retv = std::make_shared(); + []() -> std::shared_ptr { + auto retv = std::make_shared(); retv->type_ = "plain_net"; return retv; }) - .def("add_op", &NetOp::AddOp) - .def("add_op", - [](NetOp &self, const std::shared_ptr &net) -> void { - self.AddOp(std::static_pointer_cast(net)); - }) - .def("complete_add_op", &NetOp::CompleteAddOp) + .def("add_op", &ops::NetOp::AddOp) + .def( + "add_op", + [](ops::NetOp &self, const std::shared_ptr &net) -> void { + self.AddOp(std::static_pointer_cast(net)); + }) + .def("complete_add_op", &ops::NetOp::CompleteAddOp) .def("complete_add_op", - [](std::shared_ptr &self) { self->CompleteAddOp(); }); + [](std::shared_ptr &self) { self->CompleteAddOp(); }); + ExposeOperator(net); m.def("unique_integer", UniqueIntegerGenerator); diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 6465deeec9..96c76e22e9 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,6 +41,9 @@ function(op_library TARGET) endif() endfunction() +cc_library(net_op SRCS net_op.cc DEPS op_registry) +cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) + op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) @@ -59,6 +62,6 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(fc_op SRCS fc_op.cc - DEPS mul_op rowwise_add_op sigmoid_op softmax_op net) -op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net) + DEPS mul_op rowwise_add_op sigmoid_op softmax_op net_op) +op_library(recurrent_op SRCS recurrent_op.cc DEPS op_desc tensor op_registry operator net_op) cc_test(recurrent_op_test SRCS recurrent_op_test.cc DEPS recurrent_op gtest mul_op add_op) diff --git a/paddle/framework/net.cc b/paddle/operators/net_op.cc similarity index 96% rename from paddle/framework/net.cc rename to paddle/operators/net_op.cc index 2cd378c6b2..fbc98e0992 100644 --- a/paddle/framework/net.cc +++ b/paddle/operators/net_op.cc @@ -14,11 +14,11 @@ limitations under the License. */ -#include "paddle/framework/net.h" +#include "paddle/operators/net_op.h" #include "paddle/framework/op_registry.h" namespace paddle { -namespace framework { +namespace operators { void NetOp::CompleteAddOp(bool calc) { add_op_done_ = true; @@ -74,5 +74,5 @@ std::string NetOp::DebugString() const { bool NetOp::IsNetOp() const { return true; } -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/framework/net.h b/paddle/operators/net_op.h similarity index 89% rename from paddle/framework/net.h rename to paddle/operators/net_op.h index acf1a69da9..13611e1ee8 100644 --- a/paddle/framework/net.h +++ b/paddle/operators/net_op.h @@ -14,15 +14,17 @@ limitations under the License. */ #pragma once -#include -#include +#include "paddle/framework/op_desc.pb.h" #include "paddle/framework/op_proto.pb.h" #include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" #include "paddle/framework/scope.h" +#include "paddle/operators/type_alias.h" #include "paddle/platform/device_context.h" namespace paddle { -namespace framework { +namespace operators { + /** * @brief Network is also a type of Operator * @@ -37,13 +39,13 @@ namespace framework { * This is the base class of network, all the networks should implement the APIs * it defines. */ -class NetOp : public OperatorBase { - public: +class NetOp : public framework::OperatorBase { +public: /** * Infer all the operators' input and output variables' shapes, will be called * before every mini-batch */ - void InferShape(const Scope& scope) const override { + void InferShape(const framework::Scope& scope) const override { for (auto& op : ops_) { op->InferShape(scope); } @@ -56,7 +58,7 @@ class NetOp : public OperatorBase { * scope will be used instead. If no OpContext is provicded, default context * will be used. */ - void Run(const Scope& scope, + void Run(const framework::Scope& scope, const platform::DeviceContext& dev_ctx) const override { for (auto& op : ops_) { op->Run(scope, dev_ctx); @@ -88,7 +90,7 @@ class NetOp : public OperatorBase { std::vector> ops_; - private: +private: bool add_op_done_{false}; template @@ -97,5 +99,5 @@ class NetOp : public OperatorBase { } }; -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/framework/net_design.md b/paddle/operators/net_op_design.md similarity index 100% rename from paddle/framework/net_design.md rename to paddle/operators/net_op_design.md diff --git a/paddle/framework/net_op_test.cc b/paddle/operators/net_op_test.cc similarity index 91% rename from paddle/framework/net_op_test.cc rename to paddle/operators/net_op_test.cc index f32e456e5d..18c5c60eb4 100644 --- a/paddle/framework/net_op_test.cc +++ b/paddle/operators/net_op_test.cc @@ -1,16 +1,18 @@ +#include "paddle/operators/net_op.h" + #include -#include -#include -#include + +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" namespace paddle { -namespace framework { +namespace operators { static int infer_shape_cnt = 0; static int run_cnt = 0; class TestOp : public OperatorBase { - public: +public: void InferShape(const framework::Scope& scope) const override { ++infer_shape_cnt; } @@ -21,7 +23,7 @@ class TestOp : public OperatorBase { }; class EmptyOp : public OperatorBase { - public: +public: void InferShape(const Scope& scope) const override {} void Run(const Scope& scope, const platform::DeviceContext& dev_ctx) const override {} @@ -73,7 +75,7 @@ TEST(OpKernel, all) { ASSERT_THROW(net->AddOp(op2), paddle::platform::EnforceNotMet); } -TEST(Net, insert_op) { +TEST(NetOp, insert_op) { NetOp net; auto op1 = std::make_shared(); op1->inputs_ = {"x", "w1", "b1"}; @@ -85,5 +87,5 @@ TEST(Net, insert_op) { ASSERT_EQ(3UL, net.ops_.size()); } -} // namespace framework +} // namespace operators } // namespace paddle diff --git a/paddle/operators/recurrent_op.cc b/paddle/operators/recurrent_op.cc index e5b76e3724..aeb95569b7 100644 --- a/paddle/operators/recurrent_op.cc +++ b/paddle/operators/recurrent_op.cc @@ -18,8 +18,8 @@ #include #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" #include "paddle/platform/enforce.h" namespace paddle { diff --git a/paddle/operators/recurrent_op_test.cc b/paddle/operators/recurrent_op_test.cc index 91f2972ca4..08a6d9fe56 100644 --- a/paddle/operators/recurrent_op_test.cc +++ b/paddle/operators/recurrent_op_test.cc @@ -11,14 +11,15 @@ limitations under the License. */ +#include "paddle/operators/recurrent_op.h" + #include #include -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/operator.h" #include "paddle/framework/tensor.h" -#include "paddle/operators/recurrent_op.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace operators { diff --git a/paddle/operators/type_alias.h b/paddle/operators/type_alias.h index 4ee08a099d..931740e150 100644 --- a/paddle/operators/type_alias.h +++ b/paddle/operators/type_alias.h @@ -15,8 +15,8 @@ #pragma once #include "paddle/framework/eigen.h" -#include "paddle/framework/net.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/net_op.h" namespace paddle { namespace operators { @@ -44,15 +44,16 @@ template using EigenTensor = framework::EigenTensor; using Tensor = framework::Tensor; +using Scope = framework::Scope; using OperatorWithKernel = framework::OperatorWithKernel; +using OperatorBase = framework::OperatorBase; using OpProtoAndCheckerMaker = framework::OpProtoAndCheckerMaker; using OpProto = framework::OpProto; using OpAttrChecker = framework::OpAttrChecker; using CPUPlace = platform::CPUPlace; using GPUPlace = platform::GPUPlace; -using NetOp = framework::NetOp; using OpRegistry = framework::OpRegistry; -using OperatorBase = framework::OperatorBase; + } // namespace operators } // namespace paddle From 647121aad3d9e3af753aaa858c43ff57c724571a Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 3 Aug 2017 14:08:42 -0700 Subject: [PATCH 24/25] Add unittest for GradOpBuilder --- paddle/framework/grad_op_builder.cc | 6 +- paddle/framework/grad_op_builder_test.cc | 129 ++++++++++++++++++++++- 2 files changed, 127 insertions(+), 8 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 34722fedf9..ea5e939c6e 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -26,7 +26,7 @@ using VarIndexMap = std::unordered_map; enum class OpArgType { IN, OUT }; static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_name"; + std::string key = type == OpArgType::IN ? "input_format" : "output_format"; return op->attrs_.count(key) ? &boost::get>(op->attrs_.at(key)) : nullptr; @@ -34,7 +34,7 @@ static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { static const std::vector* GetOpFormat(const OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_name"; + std::string key = type == OpArgType::IN ? "input_format" : "output_format"; return op->attrs_.count(key) ? &boost::get>(op->attrs_.at(key)) : nullptr; @@ -84,7 +84,7 @@ OperatorBase* BuildGradOp(const OperatorBase* op) { grad_op->attrs_ = op->attrs_; grad_op->attrs_.erase("input_format"); grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OpArgType::OUT) != nullptr) { + if (GetOpFormat(op, OpArgType::IN) != nullptr) { grad_op->attrs_["output_format"] = std::vector({0}); } if (GetOpFormat(op, OpArgType::IN) != nullptr || diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index e9cf3b9798..3bc47e6f42 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -8,10 +8,49 @@ USE_OP(add_two); namespace paddle { namespace framework { +class EmptyOp : public OperatorBase { + public: + void InferShape(const Scope &scope) const override {} + void Run(const Scope &scope, + const platform::DeviceContext &dev_ctx) const override {} +}; + +class MutiInOutOpMaker : public OpProtoAndCheckerMaker { + public: + MutiInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("In1", "a single input"); + AddInput("In2_mult", "a multiple input").SetMultiple(); + AddInput("In3", "another single input"); + AddOutput("Out1", "a single output"); + AddOutput("Out2_mult", "a multiple output").SetMultiple(); + AddComment("test op with multiple inputs and outputs"); + } +}; + +class IOIgnoredOpMaker : public OpProtoAndCheckerMaker { + public: + IOIgnoredOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("In1", "a single input"); + AddInput("In2_mult", "a multiple input").SetMultiple().IgnoreGradient(); + AddInput("In3_mult", "another multiple input").SetMultiple(); + AddOutput("Out1_mult", "a multiple output").SetMultiple(); + AddOutput("Out2", "a single output").IgnoreGradient(); + AddComment("op with inputs and outputs ignored in gradient calculating"); + } +}; + +} // namespace framework +} // namespace paddle + +namespace f = paddle::framework; + TEST(GradOpBuilder, AddTwo) { - std::shared_ptr add_op( - OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); - std::shared_ptr grad_add_op = OpRegistry::CreateGradOp(*add_op); + std::shared_ptr add_op( + f::OpRegistry::CreateOp("add_two", {"x", "y"}, {"out"}, {})); + std::shared_ptr grad_add_op = + f::OpRegistry::CreateGradOp(*add_op); EXPECT_EQ(static_cast(grad_add_op->inputs_.size()), 4); EXPECT_EQ(static_cast(grad_add_op->outputs_.size()), 2); EXPECT_EQ(grad_add_op->Input("X"), "x"); @@ -22,5 +61,85 @@ TEST(GradOpBuilder, AddTwo) { EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD"); } -} // namespace framework -} // namespace paddle \ No newline at end of file +REGISTER_OP(mult_io, f::EmptyOp, f::MutiInOutOpMaker); +REGISTER_GRADIENT_OP(mult_io, mult_io_grad, f::EmptyOp); +REGISTER_OP(io_ignored, f::EmptyOp, f::IOIgnoredOpMaker); +REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::EmptyOp); + +TEST(GradOpBuilder, MutiInOut) { + f::AttributeMap attrs{{"input_format", std::vector{0, 1, 4, 5}}, + {"output_format", std::vector{0, 1, 3}}}; + std::shared_ptr test_op(f::OpRegistry::CreateOp( + "mult_io", {"in1", "in2_1", "in2_2", "in2_3", "in3"}, + {"out1", "out2_1", "out2_2"}, attrs)); + std::shared_ptr grad_test_op = + f::OpRegistry::CreateGradOp(*test_op); + + ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + EXPECT_EQ(grad_test_op->Input("In1"), "in1"); + EXPECT_EQ(grad_test_op->Inputs("In2_mult"), + std::vector({"in2_1", "in2_2", "in2_3"})); + EXPECT_EQ(grad_test_op->Input("In3"), "in3"); + EXPECT_EQ(grad_test_op->Input("Out1"), "out1"); + EXPECT_EQ(grad_test_op->Inputs("Out2_mult"), + std::vector({"out2_1", "out2_2"})); + EXPECT_EQ(grad_test_op->Input("Out1" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "out1" + f::OperatorBase::GRAD_VAR_SUFFIX()); + EXPECT_EQ( + grad_test_op->Inputs("Out2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()), + std::vector( + {"out2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "out2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()})); + + ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "in1" + f::OperatorBase::GRAD_VAR_SUFFIX()); + EXPECT_EQ( + grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()), + std::vector({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "in2_3" + f::OperatorBase::GRAD_VAR_SUFFIX()})); + EXPECT_EQ(grad_test_op->Output("In3" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "in3" + f::OperatorBase::GRAD_VAR_SUFFIX()); +} + +TEST(GradOpBuilder, IOIgnoredInGradient) { + f::AttributeMap attrs{{"input_format", std::vector{0, 1, 3, 5}}, + {"output_format", std::vector{0, 2, 3}}}; + std::shared_ptr test_op(f::OpRegistry::CreateOp( + "io_ignored", {"in1", "in2_1", "in2_2", "in3_1", "in3_2"}, + {"out1_1", "out1_2", "out2"}, attrs)); + std::shared_ptr grad_test_op = + f::OpRegistry::CreateGradOp(*test_op); + + // 'In2' and 'Out2' are ignored in gradient calculating + ASSERT_EQ(grad_test_op->inputs_.size(), 5UL + 3UL + 3UL); + EXPECT_EQ(grad_test_op->Input("In1"), "in1"); + EXPECT_EQ(grad_test_op->Inputs("In2_mult"), + std::vector({f::OperatorBase::EMPTY_VAR_NAME(), + f::OperatorBase::EMPTY_VAR_NAME()})); + EXPECT_EQ(grad_test_op->Inputs("In3_mult"), + std::vector({"in3_1", "in3_2"})); + EXPECT_EQ(grad_test_op->Inputs("Out1_mult"), + std::vector({"out1_1", "out1_2"})); + EXPECT_EQ(grad_test_op->Input("Out2"), f::OperatorBase::EMPTY_VAR_NAME()); + EXPECT_EQ( + grad_test_op->Inputs("Out1_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()), + std::vector( + {"out1_1" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "out1_2" + f::OperatorBase::GRAD_VAR_SUFFIX()})); + EXPECT_EQ(grad_test_op->Input("Out2" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "out2" + f::OperatorBase::GRAD_VAR_SUFFIX()); + + ASSERT_EQ(grad_test_op->outputs_.size(), 5UL); + EXPECT_EQ(grad_test_op->Output("In1" + f::OperatorBase::GRAD_VAR_SUFFIX()), + "in1" + f::OperatorBase::GRAD_VAR_SUFFIX()); + EXPECT_EQ( + grad_test_op->Outputs("In2_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()), + std::vector({"in2_1" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "in2_2" + f::OperatorBase::GRAD_VAR_SUFFIX()})); + EXPECT_EQ( + grad_test_op->Outputs("In3_mult" + f::OperatorBase::GRAD_VAR_SUFFIX()), + std::vector({"in3_1" + f::OperatorBase::GRAD_VAR_SUFFIX(), + "in3_2" + f::OperatorBase::GRAD_VAR_SUFFIX()})); +} From d12b1e77f4049b85643e852797bd40990e5cbc8b Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 3 Aug 2017 14:51:37 -0700 Subject: [PATCH 25/25] Rename EmptyOp into NOP --- paddle/framework/grad_op_builder_test.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/framework/grad_op_builder_test.cc b/paddle/framework/grad_op_builder_test.cc index 3bc47e6f42..96d7f309d6 100644 --- a/paddle/framework/grad_op_builder_test.cc +++ b/paddle/framework/grad_op_builder_test.cc @@ -8,7 +8,7 @@ USE_OP(add_two); namespace paddle { namespace framework { -class EmptyOp : public OperatorBase { +class NOP : public OperatorBase { public: void InferShape(const Scope &scope) const override {} void Run(const Scope &scope, @@ -61,10 +61,10 @@ TEST(GradOpBuilder, AddTwo) { EXPECT_EQ(grad_add_op->Output("Y@GRAD"), "y@GRAD"); } -REGISTER_OP(mult_io, f::EmptyOp, f::MutiInOutOpMaker); -REGISTER_GRADIENT_OP(mult_io, mult_io_grad, f::EmptyOp); -REGISTER_OP(io_ignored, f::EmptyOp, f::IOIgnoredOpMaker); -REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::EmptyOp); +REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker); +REGISTER_GRADIENT_OP(mult_io, mult_io_grad, f::NOP); +REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker); +REGISTER_GRADIENT_OP(io_ignored, io_ignored_grad, f::NOP); TEST(GradOpBuilder, MutiInOut) { f::AttributeMap attrs{{"input_format", std::vector{0, 1, 4, 5}},