From b0d2235834cd1a94c39c1e937f95c58bd7319abc Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Tue, 3 Oct 2017 16:24:24 -0700
Subject: [PATCH 1/4] Bug fix

---
 paddle/framework/backward.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index 2c13ddd8d0..89583ade95 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -147,7 +147,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
       for (size_t output_idx = 0; output_idx < dup_outputs.size() - 1;
            ++output_idx) {
         auto insert_add_x = dup_outputs[output_idx];
-        auto insert_add_y = dup_outputs[output_idx];
+        auto insert_add_y = dup_outputs[output_idx + 1];
         auto insert_add_out = name + "@SHARED@" + std::to_string(output_idx);
         // first add op inserted
         if (output_idx == dup_outputs.size() - 2) {

From f4491fa46d1583caa7f007a581995435a32f8dab Mon Sep 17 00:00:00 2001
From: fengjiayi <fengjiayi@baidu.com>
Date: Tue, 3 Oct 2017 16:34:21 -0700
Subject: [PATCH 2/4] Fix bug

---
 paddle/framework/backward.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc
index 89583ade95..c0188c0e55 100644
--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -158,9 +158,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
         }
         insert_position.push_back(
             {dup_op.back(),
-             OpRegistry::CreateOp(
-                 "sum", {{"X", {insert_add_x}}, {"X", {insert_add_y}}},
-                 {{"Out", {insert_add_out}}}, {})});
+             OpRegistry::CreateOp("sum", {{"X", {insert_add_x, insert_add_y}}},
+                                  {{"Out", {insert_add_out}}}, {})});
       }
     }
 
@@ -200,7 +199,8 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
 
     // process recurrent gradient op as a special operator.
     if (forwardOp.Type() == "recurrent") {
-      // NOTE clean up cycle call somewhere (RNN's stepnet constains itself), or
+      // NOTE clean up cycle call somewhere (RNN's stepnet constains itself),
+      // or
       // this will result in infinite loop.
       const auto& rnnop =
           *static_cast<const operators::RecurrentOp*>(&forwardOp);

From 324876bbbfb0dd84f2172f951a2a4880bee32df4 Mon Sep 17 00:00:00 2001
From: Abhinav Arora <abhinavarora28@gmail.com>
Date: Tue, 3 Oct 2017 17:26:02 -0700
Subject: [PATCH 3/4] Changing learning rate from type Input(float) to
 Input(tensor) (#4578)

---
 paddle/operators/sgd_op.cc                      | 3 +++
 paddle/operators/sgd_op.h                       | 2 +-
 python/paddle/v2/framework/tests/test_sgd_op.py | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc
index 8f9eae4186..1a4d3fb8c5 100644
--- a/paddle/operators/sgd_op.cc
+++ b/paddle/operators/sgd_op.cc
@@ -32,6 +32,9 @@ class SGDOp : public framework::OperatorWithKernel {
     PADDLE_ENFORCE(ctx->HasOutput("param_out"),
                    "Output(param_out) of SGDOp should not be null.");
 
+    auto lr_dims = ctx->GetInputDim("learning_rate");
+    PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
+                      "Learning rate should have 1 element");
     auto param_dim = ctx->GetInputDim("param");
     PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("grad"),
                       "Two input of SGD Op's dimension must be same.");
diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h
index 977d201ced..e2ae65beb0 100644
--- a/paddle/operators/sgd_op.h
+++ b/paddle/operators/sgd_op.h
@@ -31,7 +31,7 @@ class SGDOpKernel : public framework::OpKernel<T> {
     auto param = ctx.Input<Tensor>("param");
     auto grad = ctx.Input<Tensor>("grad");
     auto param_out = ctx.Output<Tensor>("param_out");
-    float lr = *ctx.Input<float>("learning_rate");
+    float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
 
     param_out->mutable_data<T>(ctx.GetPlace());
 
diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py
index f1125f4edb..c05364490f 100644
--- a/python/paddle/v2/framework/tests/test_sgd_op.py
+++ b/python/paddle/v2/framework/tests/test_sgd_op.py
@@ -8,7 +8,7 @@ class TestSGDOp(OpTest):
         self.op_type = "sgd"
         w = np.random.random((102, 105)).astype("float32")
         g = np.random.random((102, 105)).astype("float32")
-        lr = 0.1
+        lr = np.array([0.1]).astype("float32")
 
         self.inputs = {'param': w, 'grad': g, 'learning_rate': lr}
         self.outputs = {'param_out': w - lr * g}

From eed2c1e1d6237f421c9b8c0bbd2fd51d53beddcf Mon Sep 17 00:00:00 2001
From: Abhinav Arora <abhinavarora28@gmail.com>
Date: Wed, 4 Oct 2017 09:29:13 -0700
Subject: [PATCH 4/4] Changing SGD inputs and outputs to conform to Operator
 naming convention (#4586)

---
 paddle/operators/sgd_op.cc                    | 32 +++++++++----------
 paddle/operators/sgd_op.h                     |  8 ++---
 .../paddle/v2/framework/tests/test_sgd_op.py  |  4 +--
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc
index 1a4d3fb8c5..31d491f130 100644
--- a/paddle/operators/sgd_op.cc
+++ b/paddle/operators/sgd_op.cc
@@ -23,22 +23,22 @@ class SGDOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContextBase *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("param"),
-                   "Input(param) of SGDOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("grad"),
-                   "Input(grad) of SGDOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasInput("learning_rate"),
-                   "Input(learning_rate) of SGDOp should not be null.");
-    PADDLE_ENFORCE(ctx->HasOutput("param_out"),
-                   "Output(param_out) of SGDOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Param"),
+                   "Input(Param) of SGDOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Grad"),
+                   "Input(Grad) of SGDOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
+                   "Input(LearningRate) of SGDOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
+                   "Output(ParamOut) of SGDOp should not be null.");
 
-    auto lr_dims = ctx->GetInputDim("learning_rate");
+    auto lr_dims = ctx->GetInputDim("LearningRate");
     PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
                       "Learning rate should have 1 element");
-    auto param_dim = ctx->GetInputDim("param");
-    PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("grad"),
+    auto param_dim = ctx->GetInputDim("Param");
+    PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Grad"),
                       "Two input of SGD Op's dimension must be same.");
-    ctx->SetOutputDim("param_out", param_dim);
+    ctx->SetOutputDim("ParamOut", param_dim);
   }
 };
 
@@ -46,10 +46,10 @@ class SGDOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   SGDOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("param", "input parameter");
-    AddInput("learning_rate", "learning rate of sgd");
-    AddInput("grad", "input gradient");
-    AddOutput("param_out", "output parameter");
+    AddInput("Param", "Input parameter");
+    AddInput("LearningRate", "Learning rate of SGD");
+    AddInput("Grad", "Input gradient");
+    AddOutput("ParamOut", "output parameter");
     AddComment(R"DOC(
 
 Simplest sgd algorithm.
diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h
index e2ae65beb0..d72d333a9a 100644
--- a/paddle/operators/sgd_op.h
+++ b/paddle/operators/sgd_op.h
@@ -28,10 +28,10 @@ template <typename Place, typename T>
 class SGDOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
-    auto param = ctx.Input<Tensor>("param");
-    auto grad = ctx.Input<Tensor>("grad");
-    auto param_out = ctx.Output<Tensor>("param_out");
-    float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
+    auto param = ctx.Input<Tensor>("Param");
+    auto grad = ctx.Input<Tensor>("Grad");
+    auto param_out = ctx.Output<Tensor>("ParamOut");
+    float lr = ctx.Input<Tensor>("LearningRate")->data<float>()[0];
 
     param_out->mutable_data<T>(ctx.GetPlace());
 
diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py
index c05364490f..2dd881e5e1 100644
--- a/python/paddle/v2/framework/tests/test_sgd_op.py
+++ b/python/paddle/v2/framework/tests/test_sgd_op.py
@@ -10,8 +10,8 @@ class TestSGDOp(OpTest):
         g = np.random.random((102, 105)).astype("float32")
         lr = np.array([0.1]).astype("float32")
 
-        self.inputs = {'param': w, 'grad': g, 'learning_rate': lr}
-        self.outputs = {'param_out': w - lr * g}
+        self.inputs = {'Param': w, 'Grad': g, 'LearningRate': lr}
+        self.outputs = {'ParamOut': w - lr * g}
 
     def test_check_output(self):
         self.check_output()