From b0d2235834cd1a94c39c1e937f95c58bd7319abc Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 3 Oct 2017 16:24:24 -0700 Subject: [PATCH 1/4] Bug fix --- paddle/framework/backward.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 2c13ddd8d0..89583ade95 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -147,7 +147,7 @@ static std::unique_ptr BackwardRecursive( for (size_t output_idx = 0; output_idx < dup_outputs.size() - 1; ++output_idx) { auto insert_add_x = dup_outputs[output_idx]; - auto insert_add_y = dup_outputs[output_idx]; + auto insert_add_y = dup_outputs[output_idx + 1]; auto insert_add_out = name + "@SHARED@" + std::to_string(output_idx); // first add op inserted if (output_idx == dup_outputs.size() - 2) { From f4491fa46d1583caa7f007a581995435a32f8dab Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 3 Oct 2017 16:34:21 -0700 Subject: [PATCH 2/4] Fix bug --- paddle/framework/backward.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 89583ade95..c0188c0e55 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -158,9 +158,8 @@ static std::unique_ptr BackwardRecursive( } insert_position.push_back( {dup_op.back(), - OpRegistry::CreateOp( - "sum", {{"X", {insert_add_x}}, {"X", {insert_add_y}}}, - {{"Out", {insert_add_out}}}, {})}); + OpRegistry::CreateOp("sum", {{"X", {insert_add_x, insert_add_y}}}, + {{"Out", {insert_add_out}}}, {})}); } } @@ -200,7 +199,8 @@ static std::unique_ptr BackwardRecursive( // process recurrent gradient op as a special operator. if (forwardOp.Type() == "recurrent") { - // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), or + // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), + // or // this will result in infinite loop. const auto& rnnop = *static_cast(&forwardOp); From 324876bbbfb0dd84f2172f951a2a4880bee32df4 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Tue, 3 Oct 2017 17:26:02 -0700 Subject: [PATCH 3/4] Changing learning rate from type Input(float) to Input(tensor) (#4578) --- paddle/operators/sgd_op.cc | 3 +++ paddle/operators/sgd_op.h | 2 +- python/paddle/v2/framework/tests/test_sgd_op.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 8f9eae4186..1a4d3fb8c5 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -32,6 +32,9 @@ class SGDOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(ctx->HasOutput("param_out"), "Output(param_out) of SGDOp should not be null."); + auto lr_dims = ctx->GetInputDim("learning_rate"); + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); auto param_dim = ctx->GetInputDim("param"); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("grad"), "Two input of SGD Op's dimension must be same."); diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 977d201ced..e2ae65beb0 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel { auto param = ctx.Input("param"); auto grad = ctx.Input("grad"); auto param_out = ctx.Output("param_out"); - float lr = *ctx.Input("learning_rate"); + float lr = ctx.Input("learning_rate")->data()[0]; param_out->mutable_data(ctx.GetPlace()); diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index f1125f4edb..c05364490f 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -8,7 +8,7 @@ class TestSGDOp(OpTest): self.op_type = "sgd" w = np.random.random((102, 105)).astype("float32") g = np.random.random((102, 105)).astype("float32") - lr = 0.1 + lr = np.array([0.1]).astype("float32") self.inputs = {'param': w, 'grad': g, 'learning_rate': lr} self.outputs = {'param_out': w - lr * g} From eed2c1e1d6237f421c9b8c0bbd2fd51d53beddcf Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Wed, 4 Oct 2017 09:29:13 -0700 Subject: [PATCH 4/4] Changing SGD inputs and outputs to conform to Operator naming convention (#4586) --- paddle/operators/sgd_op.cc | 32 +++++++++---------- paddle/operators/sgd_op.h | 8 ++--- .../paddle/v2/framework/tests/test_sgd_op.py | 4 +-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 1a4d3fb8c5..31d491f130 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -23,22 +23,22 @@ class SGDOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("param"), - "Input(param) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("grad"), - "Input(grad) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("learning_rate"), - "Input(learning_rate) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("param_out"), - "Output(param_out) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of SGDOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of SGDOp should not be null."); - auto lr_dims = ctx->GetInputDim("learning_rate"); + auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, "Learning rate should have 1 element"); - auto param_dim = ctx->GetInputDim("param"); - PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("grad"), + auto param_dim = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"), "Two input of SGD Op's dimension must be same."); - ctx->SetOutputDim("param_out", param_dim); + ctx->SetOutputDim("ParamOut", param_dim); } }; @@ -46,10 +46,10 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker { public: SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("param", "input parameter"); - AddInput("learning_rate", "learning rate of sgd"); - AddInput("grad", "input gradient"); - AddOutput("param_out", "output parameter"); + AddInput("Param", "Input parameter"); + AddInput("LearningRate", "Learning rate of SGD"); + AddInput("Grad", "Input gradient"); + AddOutput("ParamOut", "output parameter"); AddComment(R"DOC( Simplest sgd algorithm. diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index e2ae65beb0..d72d333a9a 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -28,10 +28,10 @@ template class SGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param = ctx.Input("param"); - auto grad = ctx.Input("grad"); - auto param_out = ctx.Output("param_out"); - float lr = ctx.Input("learning_rate")->data()[0]; + auto param = ctx.Input("Param"); + auto grad = ctx.Input("Grad"); + auto param_out = ctx.Output("ParamOut"); + float lr = ctx.Input("LearningRate")->data()[0]; param_out->mutable_data(ctx.GetPlace()); diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index c05364490f..2dd881e5e1 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -10,8 +10,8 @@ class TestSGDOp(OpTest): g = np.random.random((102, 105)).astype("float32") lr = np.array([0.1]).astype("float32") - self.inputs = {'param': w, 'grad': g, 'learning_rate': lr} - self.outputs = {'param_out': w - lr * g} + self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr} + self.outputs = {'ParamOut': w - lr * g} def test_check_output(self): self.check_output()