diff --git a/doc/design/register_grad_op.md b/doc/design/register_grad_op.md index 12b04fb271..3cf8a59446 100644 --- a/doc/design/register_grad_op.md +++ b/doc/design/register_grad_op.md @@ -33,22 +33,45 @@ The mapping relationship between an operator and its gradient operators is a fun ```cpp // (OpDesc) --> vector -using GradOpDescMaker = std::function(const OpDesc&)>; +std::function(const OpDescBind&)>; ``` -The function take a `OpDesc` of the forward operator and return one or many gradient operator descriptions. +The function takes an `OpDescBind` of the forward operator and returns one or many gradient operator descriptions. `OpDescBind` is a C++ wrapper for protobuf message `OpDesc` to manipulate `OpDesc` fast. The `GradOpDescMaker` will be registered in `OpInfo`, to replace `grad_op_type_` field. The `OpInfo` should be ```cpp struct OpInfo { - GradOpDescMaker grad_op_maker_; + std::function>(const OpDescBind&)> grad_op_maker_; ... }; ``` The `grad_op_maker_ ` is `nullptr` if the operator does not have associated gradient operators. +We propose a base class called `GradOpDescMakerBase` to let operator developers generate `Gradient Operators` easily. The public interface of that class is + +```cpp +class GradOpDescMakerBase { +public: + GradOpDescMakerBase(const OpDescBind& ); + virtual std::vector> operator()()const = 0; +}; +``` + +We can convert `GradOpDescMakerBase` to `std::function>(const OpDescBind&)>` by + +```cpp +using GradOpMaker = ...; +std::function(const OpDescBind&)> func; +func = [] (const OpDescBind& fwd_op) { + GradOpMaker maker(fwd_op); + return maker(); +}; +``` + +We can write many helper functions since the `GradOpDescMakerBase` is a class now. The basic helper functions get the variables of `Input`, `Output`, `InputGradient` and `OutputGradient` in the forwarding operator. + We should chagne register macros at the same time. In the current solution, there is no difference between forwarding operators and backward operators. So `REGISTER_OP` just register one operator. If the `REGISTER_OPERATOR ` contains `OpProtoAndCheckerMaker` and `GradOpDescMaker`, we just list them in the same macro. It can be done by a macro contains `__VA_ARGS__`. The user interface should be diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 2c13ddd8d0..c0188c0e55 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -147,7 +147,7 @@ static std::unique_ptr BackwardRecursive( for (size_t output_idx = 0; output_idx < dup_outputs.size() - 1; ++output_idx) { auto insert_add_x = dup_outputs[output_idx]; - auto insert_add_y = dup_outputs[output_idx]; + auto insert_add_y = dup_outputs[output_idx + 1]; auto insert_add_out = name + "@SHARED@" + std::to_string(output_idx); // first add op inserted if (output_idx == dup_outputs.size() - 2) { @@ -158,9 +158,8 @@ static std::unique_ptr BackwardRecursive( } insert_position.push_back( {dup_op.back(), - OpRegistry::CreateOp( - "sum", {{"X", {insert_add_x}}, {"X", {insert_add_y}}}, - {{"Out", {insert_add_out}}}, {})}); + OpRegistry::CreateOp("sum", {{"X", {insert_add_x, insert_add_y}}}, + {{"Out", {insert_add_out}}}, {})}); } } @@ -200,7 +199,8 @@ static std::unique_ptr BackwardRecursive( // process recurrent gradient op as a special operator. if (forwardOp.Type() == "recurrent") { - // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), or + // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), + // or // this will result in infinite loop. const auto& rnnop = *static_cast(&forwardOp); diff --git a/paddle/framework/details/op_registry.h b/paddle/framework/details/op_registry.h index d2516ccc1e..daa474e8c5 100644 --- a/paddle/framework/details/op_registry.h +++ b/paddle/framework/details/op_registry.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/framework/grad_op_desc_maker.h" #include "paddle/framework/op_info.h" #include "paddle/framework/op_proto_maker.h" #include "paddle/framework/operator.h" @@ -96,7 +97,10 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->grad_op_maker_ = new T(); + info->grad_op_maker_ = [](const OpDescBind& fwd_op) { + T maker(fwd_op); + return maker(); + }; } }; } // namespace details diff --git a/paddle/framework/grad_op_desc_maker.h b/paddle/framework/grad_op_desc_maker.h new file mode 100644 index 0000000000..e9ae6e2206 --- /dev/null +++ b/paddle/framework/grad_op_desc_maker.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once +#include "paddle/framework/op_desc.h" +#include "paddle/framework/operator.h" + +namespace paddle { +namespace framework { + +class GradOpDescMakerBase { + public: + explicit GradOpDescMakerBase(const OpDescBind& fwd_op) : fwd_op_(fwd_op) {} + + virtual ~GradOpDescMakerBase() = default; + virtual std::vector> operator()() const = 0; + + protected: + static std::vector ToGradNames( + const std::vector& var_names) { + std::vector ret_val; + ret_val.reserve(var_names.size()); + std::transform(var_names.begin(), var_names.end(), + std::back_inserter(ret_val), GradVarName); + return ret_val; + } + + std::vector InputGrad(const std::string& name) const { + return ToGradNames(fwd_op_.Input(name)); + } + + std::vector OutputGrad(const std::string& name) const { + return ToGradNames(fwd_op_.Output(name)); + } + + std::vector InputNames() const { + return this->fwd_op_.InputNames(); + } + + std::vector OutputNames() const { + return this->fwd_op_.OutputNames(); + } + + std::vector Input(const std::string& name) const { + return fwd_op_.Input(name); + } + + std::vector Output(const std::string& name) const { + return fwd_op_.Output(name); + } + + const std::unordered_map& Attrs() const { + return fwd_op_.GetAttrMap(); + } + + const Attribute& GetAttr(const std::string& name) const { + auto& map = fwd_op_.GetAttrMap(); + auto it = map.find(name); + PADDLE_ENFORCE(it != map.end(), "Cannot find attribute %s", name); + return it->second; + } + + std::string ForwardOpType() const { return this->fwd_op_.Type(); } + + private: + const OpDescBind& fwd_op_; +}; + +class SingleGradOpDescMaker : public GradOpDescMakerBase { + public: + using GradOpDescMakerBase::GradOpDescMakerBase; + + std::vector> operator()() const { + std::vector> retv; + retv.emplace_back(this->Apply()); + return retv; + } + + protected: + virtual std::unique_ptr Apply() const = 0; +}; + +class DefaultGradOpDescMaker : public SingleGradOpDescMaker { + public: + using SingleGradOpDescMaker::SingleGradOpDescMaker; + + protected: + virtual std::unique_ptr Apply() const { + auto* grad = new OpDescBind(); + grad->SetType(this->GradOpType()); + + for (auto& input_param : this->InputNames()) { + grad->SetInput(input_param, this->Input(input_param)); + grad->SetOutput(GradVarName(input_param), this->InputGrad(input_param)); + } + + for (auto& output_param : this->OutputNames()) { + grad->SetInput(output_param, this->Output(output_param)); + grad->SetInput(GradVarName(output_param), this->OutputGrad(output_param)); + } + + grad->SetAttrMap(this->Attrs()); + + return std::unique_ptr(grad); + } + + virtual std::string GradOpType() const { + return this->ForwardOpType() + "_grad"; + } +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/op_desc.cc b/paddle/framework/op_desc.cc index 33a064890c..852f0f1eb8 100644 --- a/paddle/framework/op_desc.cc +++ b/paddle/framework/op_desc.cc @@ -31,15 +31,6 @@ const std::vector &OpDescBind::Input( return it->second; } -std::vector OpDescBind::InputNames() const { - std::vector retv; - retv.reserve(this->inputs_.size()); - for (auto &ipt : this->inputs_) { - retv.push_back(ipt.first); - } - return retv; -} - void OpDescBind::SetInput(const std::string ¶m_name, const std::vector &args) { need_update_ = true; @@ -54,15 +45,6 @@ const std::vector &OpDescBind::Output( return it->second; } -std::vector OpDescBind::OutputNames() const { - std::vector retv; - retv.reserve(this->outputs_.size()); - for (auto &ipt : this->outputs_) { - retv.push_back(ipt.first); - } - return retv; -} - void OpDescBind::SetOutput(const std::string ¶m_name, const std::vector &args) { need_update_ = true; diff --git a/paddle/framework/op_desc.h b/paddle/framework/op_desc.h index 0af4169715..397393f796 100644 --- a/paddle/framework/op_desc.h +++ b/paddle/framework/op_desc.h @@ -35,15 +35,11 @@ class OpDescBind { const std::vector &Input(const std::string &name) const; - std::vector InputNames() const; - void SetInput(const std::string ¶m_name, const std::vector &args); const std::vector &Output(const std::string &name) const; - std::vector OutputNames() const; - void SetOutput(const std::string ¶m_name, const std::vector &args); @@ -61,9 +57,6 @@ class OpDescBind { void SetBlockAttr(const std::string &name, BlockDescBind &block); - // Only be used in C++ - void SetAttrMap(const AttributeMap &attr_map); - Attribute GetAttr(const std::string &name) const; int GetBlockAttr(const std::string &name) const; @@ -71,7 +64,23 @@ class OpDescBind { // Only be used in C++ const AttributeMap &GetAttrMap() const; + // Only be used in C++ + void SetAttrMap(const AttributeMap &attr_map); + + std::vector InputNames() const { return MapKeys(inputs_); } + std::vector OutputNames() const { return MapKeys(outputs_); } + private: + template + static std::vector MapKeys(const MapType &map) { + std::vector ret_val; + ret_val.reserve(map.size()); + std::transform( + map.begin(), map.end(), std::back_inserter(ret_val), + [](const typename MapType::value_type &pair) { return pair.first; }); + return ret_val; + } + void Sync(); OpDesc op_desc_; diff --git a/paddle/framework/op_info.h b/paddle/framework/op_info.h index 9672e540c8..8b7882485f 100644 --- a/paddle/framework/op_info.h +++ b/paddle/framework/op_info.h @@ -25,16 +25,10 @@ namespace paddle { namespace framework { -class GradOpDescMakerBase { - public: - virtual ~GradOpDescMakerBase() = default; - virtual std::vector operator()(const OpDescBind&) const = 0; -}; - struct OpInfo { OpCreator creator_; std::string grad_op_type_; - GradOpDescMakerBase* grad_op_maker_{nullptr}; + GradOpMakerFN grad_op_maker_; OpProto* proto_{nullptr}; OpAttrChecker* checker_{nullptr}; diff --git a/paddle/framework/type_defs.h b/paddle/framework/type_defs.h index dec5066f1e..a5b9472213 100644 --- a/paddle/framework/type_defs.h +++ b/paddle/framework/type_defs.h @@ -20,6 +20,7 @@ namespace paddle { namespace framework { class OperatorBase; +class OpDescBind; using VariableNameMap = std::map>; // The order should be as same as framework.proto @@ -34,5 +35,8 @@ using OpCreator = std::function; +using GradOpMakerFN = + std::function>(const OpDescBind&)>; + } // namespace framework } // namespace paddle diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 8f9eae4186..31d491f130 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -23,19 +23,22 @@ class SGDOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("param"), - "Input(param) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("grad"), - "Input(grad) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("learning_rate"), - "Input(learning_rate) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("param_out"), - "Output(param_out) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of SGDOp should not be null."); - auto param_dim = ctx->GetInputDim("param"); - PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("grad"), + auto lr_dims = ctx->GetInputDim("LearningRate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); + auto param_dim = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), "Two input of SGD Op's dimension must be same."); - ctx->SetOutputDim("param_out", param_dim); + ctx->SetOutputDim("ParamOut", param_dim); } }; @@ -43,10 +46,10 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker { public: SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("param", "input parameter"); - AddInput("learning_rate", "learning rate of sgd"); - AddInput("grad", "input gradient"); - AddOutput("param_out", "output parameter"); + AddInput("Param", "Input parameter"); + AddInput("LearningRate", "Learning rate of SGD"); + AddInput("Grad", "Input gradient"); + AddOutput("ParamOut", "output parameter"); AddComment(R"DOC( Simplest sgd algorithm. diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 977d201ced..d72d333a9a 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -28,10 +28,10 @@ template class SGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param = ctx.Input("param"); - auto grad = ctx.Input("grad"); - auto param_out = ctx.Output("param_out"); - float lr = *ctx.Input("learning_rate"); + auto param = ctx.Input("Param"); + auto grad = ctx.Input("Grad"); + auto param_out = ctx.Output("ParamOut"); + float lr = ctx.Input("LearningRate")->data()[0]; param_out->mutable_data(ctx.GetPlace()); diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index f1125f4edb..2dd881e5e1 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -8,10 +8,10 @@ class TestSGDOp(OpTest): self.op_type = "sgd" w = np.random.random((102, 105)).astype("float32") g = np.random.random((102, 105)).astype("float32") - lr = 0.1 + lr = np.array([0.1]).astype("float32") - self.inputs = {'param': w, 'grad': g, 'learning_rate': lr} - self.outputs = {'param_out': w - lr * g} + self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} + self.outputs = {'ParamOut': w - lr * g} def test_check_output(self): self.check_output()