From 82026fe8d952f197ae63964dd70442ede737c18b Mon Sep 17 00:00:00 2001
From: qijun <qijun1994@hotmail.com>
Date: Thu, 10 Aug 2017 12:06:29 +0800
Subject: [PATCH 1/8] remove eigen tensor header file in dddim.h

---
 paddle/framework/ddim.h | 1 -
 1 file changed, 1 deletion(-)
diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h
index 5aa5af0c19..3cb59e1ed2 100644
--- a/paddle/framework/ddim.h
+++ b/paddle/framework/ddim.h
@@ -20,7 +20,6 @@ limitations under the License. */
 #include <vector>
 #include "paddle/framework/dim.h"
 #include "paddle/platform/enforce.h"
-#include "unsupported/Eigen/CXX11/Tensor"
 
 namespace paddle {
 namespace framework {

From f485a9bc501e743b5284132a6c06ad8bc365b065 Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Fri, 11 Aug 2017 13:44:39 +0800
Subject: [PATCH 2/8] add auto gradient check design doc

---
 doc/design/auto_gradient_check.md             | 146 ++++++++++++++++++
 .../v2/framework/tests/gradient_checker.py    |  16 +-
 2 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 doc/design/auto_gradient_check.md

diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md
new file mode 100644
index 0000000000..0303d6fbc0
--- /dev/null
+++ b/doc/design/auto_gradient_check.md
@@ -0,0 +1,146 @@
+## auto gradient check Design
+
+## Backgraound：
+- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right:
+  - **Firstly** you should get the right backpropagation formula according to the forward computation.
+  - **Secondly** you should implement it right in CPP.
+  - **Thirdly** it's difficult to prepare test data.
+
+- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages:
+  - **Firstly** numeric gradient checker only need forward operator.
+  - **Secondly** user only need to prepare the input data for forward Operator.
+
+## mathematical theory
+The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful.
+
+- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
+- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)
+
+
+## Numeric Gradient Implementation
+### Interface
+```python
+def get_numeric_gradient(op,
+                         input_values,
+                         output_name,
+                         input_to_check,
+                         delta=0.005,
+                         local_scope=None):
+    """
+    Get Numeric Gradient for an operator's input.
+
+    :param op: C++ operator instance, could be an network
+    :param input_values: The input variables. Should be an dictionary, key is
+    variable name. Value is numpy array.
+    :param output_name: The final output variable name.
+    :param input_to_check: The input variable need to get gradient.
+    :param delta: The perturbation value for numeric gradient method. The
+    smaller delta is, the more accurate result will get. But if that delta is
+     too small, it could occur numerical stability problem.
+    :param local_scope: The local scope used for get_numeric_gradient.
+    :return: The gradient array in numpy format.
+    """
+```
+
+### Explaination:
+
+1. Why need `output_name`
+  - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate.
+
+1. Why need `input_to_check`
+  - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times.
+
+
+### Core algorithm implement
+
+
+```python
+    # we only compute gradient of one element each time.
+    # we use a for loop to compute the gradient of every element.
+    for i in xrange(tensor_size):
+        # get one input element throw it's index i.
+        origin = tensor_to_check.get_float_element(i)
+
+        # add delta to it, run op and then get the sum of the result tensor.
+        x_pos = origin + delta
+        tensor_to_check.set_float_element(i, x_pos)
+        y_pos = get_output()
+
+        # plus delta to this element, run op and get the sum of the result tensor.
+        x_neg = origin - delta
+        tensor_to_check.set_float_element(i, x_neg)
+        y_neg = get_output()
+
+        # restore old value
+        tensor_to_check.set_float_element(i, origin)
+
+        # compute the gradient of this element and store it into a numpy array.
+        gradient_flat[i] = (y_pos - y_neg) / delta / 2
+
+    # reshape the gradient result to the shape of the source tensor.
+    return gradient_flat.reshape(tensor_to_check.get_dims())
+```
+
+## auto check framework design
+
+Each Operator Kernel has three kinds of Gradient:
+
+- 1. Numeric Gradient
+- 2. CPU Operator Gradient
+- 3. GPU Operator Gradient(if supported)
+
+Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value.
+
+- **Firstly** calculate the numeric gradient.
+- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient.
+- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU)
+
+#### auto check python Interface
+
+```python
+    def check_grad(self,
+                   forward_op,
+                   input_vars,
+                   inputs_to_check,
+                   output_name,
+                   no_grad_set=None,
+                   only_cpu=False,
+                   max_relative_error=0.005):
+        """
+        :param forward_op: used to create backward_op
+        :param input_vars: numpy value of input variable. The following
+            computation will use these variables.
+        :param inputs_to_check: inputs var names that should check gradient.
+        :param output_name: output name that used to
+        :param max_relative_error: The relative tolerance parameter.
+        :param no_grad_set: used when create backward ops
+        :param only_cpu: only compute and check gradient on cpu kernel.
+        :return:
+        """
+```
+
+### How two check two numpy array is close enough?
+if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative
+
+```python
+numeric_grad = ...
+operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor())
+
+abs_numeric_grad = numpy.abs(numeric_grad)
+# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative
+# error.
+abs_numeric_grad[abs_numeric_grad < 1e-3] = 1
+
+diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad
+max_diff = numpy.max(diff_mat)
+```
+
+
+#### Notes：
+1，The Input data for auto gradient checker should be reasonable to avoid numeric problem.
+
+
+#### refs:
+
+- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
+- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)
diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py
index aacc5e88fe..015e832e82 100644
--- a/python/paddle/v2/framework/tests/gradient_checker.py
+++ b/python/paddle/v2/framework/tests/gradient_checker.py
@@ -73,21 +73,35 @@ def get_numeric_gradient(op,
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
 
+    # get the input tensor that we want to get it's numeric gradient.
     tensor_to_check = local_scope.find_var(input_to_check).get_tensor()
     tensor_size = product(tensor_to_check.get_dims())
+    # prepare a numpy array to store the gradient.
     gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32')
+
+    # we only compute gradient of one element each time.
+    # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
+        # get one input element throw it's index i.
         origin = tensor_to_check.get_float_element(i)
+
+        # add delta to it, run op and then get the sum of the result tensor.
         x_pos = origin + delta
         tensor_to_check.set_float_element(i, x_pos)
         y_pos = get_output()
 
+        # plus delta to this element, run op and get the sum of the result tensor.
         x_neg = origin - delta
         tensor_to_check.set_float_element(i, x_neg)
         y_neg = get_output()
 
-        tensor_to_check.set_float_element(i, origin)  # restore old value
+        # restore old value
+        tensor_to_check.set_float_element(i, origin)
+
+        # compute the gradient of this element and store it into a numpy array.
         gradient_flat[i] = (y_pos - y_neg) / delta / 2
+
+    # reshape the gradient result to the shape of the source tensor.
     return gradient_flat.reshape(tensor_to_check.get_dims())
 
 

From e7822dcdc999e8b97d908803926811baf60e67bd Mon Sep 17 00:00:00 2001
From: qiaolongfei <qiaolongfei@baidu.com>
Date: Fri, 11 Aug 2017 15:56:08 +0800
Subject: [PATCH 3/8] Capitalize the first character of some title

---
 doc/design/auto_gradient_check.md | 36 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md
index 0303d6fbc0..1f4d4ec16f 100644
--- a/doc/design/auto_gradient_check.md
+++ b/doc/design/auto_gradient_check.md
@@ -1,16 +1,16 @@
-## auto gradient check Design
+## Auto Gradient Checker Design
 
 ## Backgraound：
 - Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right:
-  - **Firstly** you should get the right backpropagation formula according to the forward computation.
-  - **Secondly** you should implement it right in CPP.
-  - **Thirdly** it's difficult to prepare test data.
+  - 1. you should get the right backpropagation formula according to the forward computation.
+  - 2. you should implement it right in CPP.
+  - 3. it's difficult to prepare test data.
 
 - Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages:
-  - **Firstly** numeric gradient checker only need forward operator.
-  - **Secondly** user only need to prepare the input data for forward Operator.
+  - 1. numeric gradient checker only need forward operator.
+  - 2. user only need to prepare the input data for forward Operator.
 
-## mathematical theory
+## Mathematical Theory
 The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful.
 
 - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
@@ -18,7 +18,7 @@ The following two document from stanford has a detailed explanation of how to ge
 
 
 ## Numeric Gradient Implementation
-### Interface
+### Python Interface
 ```python
 def get_numeric_gradient(op,
                          input_values,
@@ -44,14 +44,14 @@ def get_numeric_gradient(op,
 
 ### Explaination:
 
-1. Why need `output_name`
+- Why need `output_name`
   - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate.
 
-1. Why need `input_to_check`
+- Why need `input_to_check`
   - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times.
 
 
-### Core algorithm implement
+### Core Algorithm Implementation
 
 
 ```python
@@ -81,7 +81,7 @@ def get_numeric_gradient(op,
     return gradient_flat.reshape(tensor_to_check.get_dims())
 ```
 
-## auto check framework design
+## Auto Graident Checker Framework
 
 Each Operator Kernel has three kinds of Gradient:
 
@@ -91,11 +91,11 @@ Each Operator Kernel has three kinds of Gradient:
 
 Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value.
 
-- **Firstly** calculate the numeric gradient.
-- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient.
-- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU)
+- 1. calculate the numeric gradient.
+- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient.
+- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU)
 
-#### auto check python Interface
+#### Python Interface
 
 ```python
     def check_grad(self,
@@ -119,7 +119,7 @@ Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as
         """
 ```
 
-### How two check two numpy array is close enough?
+### How to check if two numpy array is close enough?
 if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative
 
 ```python
@@ -140,7 +140,7 @@ max_diff = numpy.max(diff_mat)
 1，The Input data for auto gradient checker should be reasonable to avoid numeric problem.
 
 
-#### refs:
+#### Refs:
 
 - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization)
 - [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96)

From 0dc9c6c41ac8f2df31a1c0cc5e9a5a3abeb96bd8 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Fri, 11 Aug 2017 16:14:24 +0800
Subject: [PATCH 4/8] auto update the requirements in .travis.yml with
 python/setup.py.in

---
 .travis.yml             |  4 ++--
 python/requirements.txt |  9 +++++++++
 python/setup.py.in      | 12 ++----------
 3 files changed, 13 insertions(+), 12 deletions(-)
 create mode 100644 python/requirements.txt

diff --git a/.travis.yml b/.travis.yml
index 8c8c6699d3..b4b83fcdbc 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,8 +37,8 @@ before_install:
   - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
   # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
   # protobuf version.
-  - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
-  - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow
+  - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt
+  - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker
   - curl https://glide.sh/get | bash
   - eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
   - go get -u github.com/alecthomas/gometalinter
diff --git a/python/requirements.txt b/python/requirements.txt
new file mode 100644
index 0000000000..3df822bd76
--- /dev/null
+++ b/python/requirements.txt
@@ -0,0 +1,9 @@
+requests==2.9.2
+numpy>=1.12
+protobuf==3.1
+recordio
+matplotlib
+rarfile
+scipy>=0.19.0
+Pillow
+nltk>=3.2.2
diff --git a/python/setup.py.in b/python/setup.py.in
index 4110c98318..38728aa2fd 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -1,5 +1,4 @@
 from setuptools import setup, Distribution
-
 class BinaryDistribution(Distribution):
     def has_ext_modules(foo):
         return True
@@ -18,15 +17,8 @@ packages=['paddle',
           'paddle.v2.framework.proto',
           'py_paddle']
 
-setup_requires=["requests",
-                "numpy>=1.12",
-                "protobuf==3.1",
-                "recordio",
-                "matplotlib",
-                "rarfile",
-                "scipy>=0.19.0",
-                "Pillow",
-                "nltk>=3.2.2"]
+with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
+    setup_requires = f.read().splitlines()
 
 if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
     setup_requires+=["opencv-python"]

From 8e0bf6d9337b3a615c0203639f0a6755c51dfd6e Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 11 Aug 2017 13:45:51 -0700
Subject: [PATCH 5/8] Update

---
 paddle/framework/grad_op_builder.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc
index 6d032fb78f..0121d99961 100644
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -76,7 +76,7 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
 }
 
 OperatorBase* BuildGradOp(const OperatorBase* op) {
-  std::string grad_op_type = OpRegistry::grad_ops().at(op->type_);
+  const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type());
   OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
   grad_op->type_ = grad_op_type;
   grad_op->attrs_ = op->attrs_;

From 717fe5495e413eef0852dbd01689385d263aa256 Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 11 Aug 2017 15:02:25 -0700
Subject: [PATCH 6/8] UPdate grad_op_builder.cc

---
 paddle/framework/grad_op_builder.cc | 83 ++++++++++++++++-------------
 1 file changed, 47 insertions(+), 36 deletions(-)

diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc
index 0121d99961..cbfc1bfab0 100644
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -19,45 +19,46 @@ permissions and limitations under the License. */
 namespace paddle {
 namespace framework {
 
-class OpRegistry;
-
 using VarIndexMap = std::unordered_map<std::string, int>;
 
+typedef std::vector<int> Ints;
+
 enum class OpArgType { IN, OUT };
 
-static std::vector<int>* GetOpFormat(OperatorBase* op, const OpArgType& type) {
-  std::string key = type == OpArgType::IN ? "input_format" : "output_format";
-  return op->attrs_.count(key)
-             ? &boost::get<std::vector<int>>(op->attrs_.at(key))
-             : nullptr;
+const Ints* AttrFormat(const AttributeMap& attrs, const std::string& key) {
+  return (attrs.count(key) > 0) ? &boost::get<Ints>(attrs.at(key)) : nullptr;
 }
 
-static const std::vector<int>* GetOpFormat(const OperatorBase* op,
-                                           const OpArgType& type) {
-  std::string key = type == OpArgType::IN ? "input_format" : "output_format";
-  return op->attrs_.count(key)
-             ? &boost::get<std::vector<int>>(op->attrs_.at(key))
-             : nullptr;
+Ints* AttrFormat(AttributeMap& attrs, const std::string& key) {
+  return (attrs.count(key) > 0) ? &boost::get<Ints>(attrs.at(key)) : nullptr;
 }
 
-static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
-                       const OpArgType& src_type, const OpArgType& dst_type,
+static void TransOpArg(const OperatorBase* src_op,
+                       std::vector<std::string>& grad_inputs,
+                       std::vector<std::string>& grad_outputs,
+                       AttributeMap& grad_attrs,
+                       std::unordered_map<std::string, int>& grad_idxs,
+                       const std::string& src_type, const std::string& dst_type,
                        int& idx, bool is_grad) {
   const std::vector<std::string>& src_inout =
-      src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_;
-  const std::vector<int>* src_format = GetOpFormat(src_op, src_type);
+      (src_type == "input_format") ? src_op->inputs_ : src_op->outputs_;
+
+  const std::vector<int>* src_format = AttrFormat(src_op->Attrs(), src_type);
 
   std::vector<std::string>& dst_inout =
-      dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_;
-  std::vector<int>* dst_format = GetOpFormat(dst_op, dst_type);
+      (dst_type == "input_format") ? grad_inputs : grad_outputs;
+
+  std::vector<int>* dst_format = AttrFormat(grad_attrs, dst_type);
+
   const OpProto& proto = OpRegistry::protos().at(src_op->type_);
+
   const auto& src_arg_list =
-      src_type == OpArgType::IN ? proto.inputs() : proto.outputs();
+      (src_type == "input_format") ? proto.inputs() : proto.outputs();
 
   for (const auto& arg : src_arg_list) {
     std::string src_name = arg.name();
     std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name;
-    (*dst_op->in_out_idxs_)[dst_name] = idx++;
+    grad_idxs[dst_name] = idx++;
     int src_arg_idx = src_op->in_out_idxs_->at(src_name);
     int src_begin =
         src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx);
@@ -77,25 +78,35 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op,
 
 OperatorBase* BuildGradOp(const OperatorBase* op) {
   const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type());
-  OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
-  grad_op->type_ = grad_op_type;
-  grad_op->attrs_ = op->attrs_;
-  grad_op->attrs_.erase("input_format");
-  grad_op->attrs_.erase("output_format");
-  if (GetOpFormat(op, OpArgType::IN) != nullptr) {
-    grad_op->attrs_["output_format"] = std::vector<int>({0});
+
+  AttributeMap grad_attrs(op->Attrs());
+  grad_attrs.erase("input_format");
+  grad_attrs.erase("output_format");
+  if (op->Attrs().count("input_format") > 0) {
+    grad_attrs["output_format"] = std::vector<int>({0});
   }
-  if (GetOpFormat(op, OpArgType::IN) != nullptr ||
-      GetOpFormat(op, OpArgType::OUT) != nullptr) {
-    grad_op->attrs_["input_format"] = std::vector<int>({0});
+  if (op->Attrs().count("input_format") > 0 ||
+      op->Attrs().count("output_format") > 0) {
+    grad_attrs["input_format"] = std::vector<int>({0});
   }
-  grad_op->in_out_idxs_.reset(new VarIndexMap());
+
+  std::vector<std::string> grad_inputs, grad_outputs;
+  std::unordered_map<std::string, int> grad_idxs;
   int in_idx = 0;
   int out_idx = 0;
-  TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false);   // I
-  TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false);  // G
-  TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true);   // OG
-  TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true);  // IG
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+             "input_format", "input_format", in_idx, false);  // I
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+             "output_format", "input_format", in_idx, false);  // G
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+             "output_format", "input_format", in_idx, true);  // OG
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+             "input_format", "output_format", out_idx, true);  // IG
+
+  OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
+
+  // TODO(yi): Set data member of grad_op.
+
   return grad_op;
 }
 

From 5381a6eef8f1313c46105fe019a60eb753e0b75c Mon Sep 17 00:00:00 2001
From: Yi Wang <yi.wang.2005@gmail.com>
Date: Fri, 11 Aug 2017 15:08:57 -0700
Subject: [PATCH 7/8] Update

---
 paddle/framework/grad_op_builder.cc | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc
index cbfc1bfab0..8bd2bc5902 100644
--- a/paddle/framework/grad_op_builder.cc
+++ b/paddle/framework/grad_op_builder.cc
@@ -19,8 +19,6 @@ permissions and limitations under the License. */
 namespace paddle {
 namespace framework {
 
-using VarIndexMap = std::unordered_map<std::string, int>;
-
 typedef std::vector<int> Ints;
 
 enum class OpArgType { IN, OUT };
@@ -91,21 +89,27 @@ OperatorBase* BuildGradOp(const OperatorBase* op) {
   }
 
   std::vector<std::string> grad_inputs, grad_outputs;
-  std::unordered_map<std::string, int> grad_idxs;
+
+  using VarIndexMap = std::unordered_map<std::string, int>;
+  VarIndexMap* grad_idxs = new VarIndexMap;
   int in_idx = 0;
   int out_idx = 0;
-  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs,
              "input_format", "input_format", in_idx, false);  // I
-  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs,
              "output_format", "input_format", in_idx, false);  // G
-  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs,
              "output_format", "input_format", in_idx, true);  // OG
-  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs,
+  TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs,
              "input_format", "output_format", out_idx, true);  // IG
 
   OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)();
 
-  // TODO(yi): Set data member of grad_op.
+  grad_op->type_ = grad_op_type;
+  grad_op->inputs_ = grad_inputs;
+  grad_op->outputs_ = grad_outputs;
+  grad_op->attrs_ = grad_attrs;
+  grad_op->in_out_idxs_.reset(grad_idxs);
 
   return grad_op;
 }

From 37c2a23884524e6cf76b83eb981638f58d30d22d Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Fri, 11 Aug 2017 22:12:44 +0000
Subject: [PATCH 8/8] fix cpplint error

---
 paddle/trainer/NewRemoteParameterUpdater.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp
index cccb7e7cdd..35dcb235e7 100644
--- a/paddle/trainer/NewRemoteParameterUpdater.cpp
+++ b/paddle/trainer/NewRemoteParameterUpdater.cpp
@@ -68,7 +68,7 @@ void NewRemoteParameterUpdater::init(
     LOG(INFO) << "paddle_begin_init_params start";
     // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig.
     // This makes golang pserver compatible with handy V1 demos.
-    // TODO: Refine or remove these ugly converting lines
+    // TODO(wuyi): Refine or remove these ugly converting lines
     OptimizerConfig optimizerConfigV2;
     if (trainerConfig_.learning_method() == "momentum") {
       optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);