From 82026fe8d952f197ae63964dd70442ede737c18b Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 12:06:29 +0800 Subject: [PATCH 1/8] remove eigen tensor header file in dddim.h --- paddle/framework/ddim.h | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 5aa5af0c19..3cb59e1ed2 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -20,7 +20,6 @@ limitations under the License. */ #include #include "paddle/framework/dim.h" #include "paddle/platform/enforce.h" -#include "unsupported/Eigen/CXX11/Tensor" namespace paddle { namespace framework { From f485a9bc501e743b5284132a6c06ad8bc365b065 Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 11 Aug 2017 13:44:39 +0800 Subject: [PATCH 2/8] add auto gradient check design doc --- doc/design/auto_gradient_check.md | 146 ++++++++++++++++++ .../v2/framework/tests/gradient_checker.py | 16 +- 2 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 doc/design/auto_gradient_check.md diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md new file mode 100644 index 0000000000..0303d6fbc0 --- /dev/null +++ b/doc/design/auto_gradient_check.md @@ -0,0 +1,146 @@ +## auto gradient check Design + +## Backgraound: +- Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: + - **Firstly** you should get the right backpropagation formula according to the forward computation. + - **Secondly** you should implement it right in CPP. + - **Thirdly** it's difficult to prepare test data. + +- Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: + - **Firstly** numeric gradient checker only need forward operator. + - **Secondly** user only need to prepare the input data for forward Operator. + +## mathematical theory +The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) + + +## Numeric Gradient Implementation +### Interface +```python +def get_numeric_gradient(op, + input_values, + output_name, + input_to_check, + delta=0.005, + local_scope=None): + """ + Get Numeric Gradient for an operator's input. + + :param op: C++ operator instance, could be an network + :param input_values: The input variables. Should be an dictionary, key is + variable name. Value is numpy array. + :param output_name: The final output variable name. + :param input_to_check: The input variable need to get gradient. + :param delta: The perturbation value for numeric gradient method. The + smaller delta is, the more accurate result will get. But if that delta is + too small, it could occur numerical stability problem. + :param local_scope: The local scope used for get_numeric_gradient. + :return: The gradient array in numpy format. + """ +``` + +### Explaination: + +1. Why need `output_name` + - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. + +1. Why need `input_to_check` + - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. + + +### Core algorithm implement + + +```python + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. + for i in xrange(tensor_size): + # get one input element throw it's index i. + origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. + x_pos = origin + delta + tensor_to_check.set_float_element(i, x_pos) + y_pos = get_output() + + # plus delta to this element, run op and get the sum of the result tensor. + x_neg = origin - delta + tensor_to_check.set_float_element(i, x_neg) + y_neg = get_output() + + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. + gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. + return gradient_flat.reshape(tensor_to_check.get_dims()) +``` + +## auto check framework design + +Each Operator Kernel has three kinds of Gradient: + +- 1. Numeric Gradient +- 2. CPU Operator Gradient +- 3. GPU Operator Gradient(if supported) + +Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. + +- **Firstly** calculate the numeric gradient. +- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) + +#### auto check python Interface + +```python + def check_grad(self, + forward_op, + input_vars, + inputs_to_check, + output_name, + no_grad_set=None, + only_cpu=False, + max_relative_error=0.005): + """ + :param forward_op: used to create backward_op + :param input_vars: numpy value of input variable. The following + computation will use these variables. + :param inputs_to_check: inputs var names that should check gradient. + :param output_name: output name that used to + :param max_relative_error: The relative tolerance parameter. + :param no_grad_set: used when create backward ops + :param only_cpu: only compute and check gradient on cpu kernel. + :return: + """ +``` + +### How two check two numpy array is close enough? +if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative + +```python +numeric_grad = ... +operator_grad = numpy.array(scope.find_var(grad_var_name(name)).get_tensor()) + +abs_numeric_grad = numpy.abs(numeric_grad) +# if abs_numeric_grad is nearly zero, then use abs error for numeric_grad, not relative +# error. +abs_numeric_grad[abs_numeric_grad < 1e-3] = 1 + +diff_mat = numpy.abs(abs_numeric_grad - operator_grad) / abs_numeric_grad +max_diff = numpy.max(diff_mat) +``` + + +#### Notes: +1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. + + +#### refs: + +- [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) +- [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index aacc5e88fe..015e832e82 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -73,21 +73,35 @@ def get_numeric_gradient(op, def product(dim): return reduce(lambda a, b: a * b, dim, 1) + # get the input tensor that we want to get it's numeric gradient. tensor_to_check = local_scope.find_var(input_to_check).get_tensor() tensor_size = product(tensor_to_check.get_dims()) + # prepare a numpy array to store the gradient. gradient_flat = numpy.zeros(shape=(tensor_size, ), dtype='float32') + + # we only compute gradient of one element each time. + # we use a for loop to compute the gradient of every element. for i in xrange(tensor_size): + # get one input element throw it's index i. origin = tensor_to_check.get_float_element(i) + + # add delta to it, run op and then get the sum of the result tensor. x_pos = origin + delta tensor_to_check.set_float_element(i, x_pos) y_pos = get_output() + # plus delta to this element, run op and get the sum of the result tensor. x_neg = origin - delta tensor_to_check.set_float_element(i, x_neg) y_neg = get_output() - tensor_to_check.set_float_element(i, origin) # restore old value + # restore old value + tensor_to_check.set_float_element(i, origin) + + # compute the gradient of this element and store it into a numpy array. gradient_flat[i] = (y_pos - y_neg) / delta / 2 + + # reshape the gradient result to the shape of the source tensor. return gradient_flat.reshape(tensor_to_check.get_dims()) From e7822dcdc999e8b97d908803926811baf60e67bd Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Fri, 11 Aug 2017 15:56:08 +0800 Subject: [PATCH 3/8] Capitalize the first character of some title --- doc/design/auto_gradient_check.md | 36 +++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/doc/design/auto_gradient_check.md b/doc/design/auto_gradient_check.md index 0303d6fbc0..1f4d4ec16f 100644 --- a/doc/design/auto_gradient_check.md +++ b/doc/design/auto_gradient_check.md @@ -1,16 +1,16 @@ -## auto gradient check Design +## Auto Gradient Checker Design ## Backgraound: - Operator forward computing is easy to check if the result is right because it has a clear definition. **But** backpropagation is a notoriously difficult algorithm to debug and get right: - - **Firstly** you should get the right backpropagation formula according to the forward computation. - - **Secondly** you should implement it right in CPP. - - **Thirdly** it's difficult to prepare test data. + - 1. you should get the right backpropagation formula according to the forward computation. + - 2. you should implement it right in CPP. + - 3. it's difficult to prepare test data. - Auto gradient check gets a numeric gradient by forward Operator and use it as a reference of the backward Operator's result. It has several advantages: - - **Firstly** numeric gradient checker only need forward operator. - - **Secondly** user only need to prepare the input data for forward Operator. + - 1. numeric gradient checker only need forward operator. + - 2. user only need to prepare the input data for forward Operator. -## mathematical theory +## Mathematical Theory The following two document from stanford has a detailed explanation of how to get numeric gradient and why it's useful. - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) @@ -18,7 +18,7 @@ The following two document from stanford has a detailed explanation of how to ge ## Numeric Gradient Implementation -### Interface +### Python Interface ```python def get_numeric_gradient(op, input_values, @@ -44,14 +44,14 @@ def get_numeric_gradient(op, ### Explaination: -1. Why need `output_name` +- Why need `output_name` - One Operator may have multiple Output, you can get independent gradient from each Output. So user should set one output to calculate. -1. Why need `input_to_check` +- Why need `input_to_check` - One operator may have multiple inputs. Gradient Op can calculate the gradient of these Inputs at the same time. But Numeric Gradient needs to calculate them one by one. So `get_numeric_gradient` is designed to calculate the gradient for one input. If you need to compute multiple inputs, you can call `get_numeric_gradient` multiple times. -### Core algorithm implement +### Core Algorithm Implementation ```python @@ -81,7 +81,7 @@ def get_numeric_gradient(op, return gradient_flat.reshape(tensor_to_check.get_dims()) ``` -## auto check framework design +## Auto Graident Checker Framework Each Operator Kernel has three kinds of Gradient: @@ -91,11 +91,11 @@ Each Operator Kernel has three kinds of Gradient: Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as the reference value. -- **Firstly** calculate the numeric gradient. -- **Secondly** calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. -- **Thirdly** calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) +- 1. calculate the numeric gradient. +- 2. calculate CPU kernel Gradient with the backward Operator and compare it with the numeric gradient. +- 3. calculate GPU kernel Gradient with the backward Operator and compare it with the numeric gradient.(if support GPU) -#### auto check python Interface +#### Python Interface ```python def check_grad(self, @@ -119,7 +119,7 @@ Numeric Gradient Only relies on forward Operator. So we use Numeric Gradient as """ ``` -### How two check two numpy array is close enough? +### How to check if two numpy array is close enough? if `abs_numeric_grad` is nearly zero, then use abs error for numeric_grad, not relative ```python @@ -140,7 +140,7 @@ max_diff = numpy.max(diff_mat) 1,The Input data for auto gradient checker should be reasonable to avoid numeric problem. -#### refs: +#### Refs: - [Gradient checking and advanced optimization(en)](http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization) - [Gradient checking and advanced optimization(cn)](http://ufldl.stanford.edu/wiki/index.php/%E6%A2%AF%E5%BA%A6%E6%A3%80%E9%AA%8C%E4%B8%8E%E9%AB%98%E7%BA%A7%E4%BC%98%E5%8C%96) From 0dc9c6c41ac8f2df31a1c0cc5e9a5a3abeb96bd8 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Fri, 11 Aug 2017 16:14:24 +0800 Subject: [PATCH 4/8] auto update the requirements in .travis.yml with python/setup.py.in --- .travis.yml | 4 ++-- python/requirements.txt | 9 +++++++++ python/setup.py.in | 12 ++---------- 3 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 python/requirements.txt diff --git a/.travis.yml b/.travis.yml index 8c8c6699d3..b4b83fcdbc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,8 +37,8 @@ before_install: - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker - - pip install rarfile nltk==3.2.2 scipy==0.19.0 recordio matplotlib Pillow + - pip install -r $TRAVIS_BUILD_DIR/python/requirements.txt + - pip install wheel sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit LinkChecker - curl https://glide.sh/get | bash - eval "$(GIMME_GO_VERSION=1.8.3 gimme)" - go get -u github.com/alecthomas/gometalinter diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 0000000000..3df822bd76 --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,9 @@ +requests==2.9.2 +numpy>=1.12 +protobuf==3.1 +recordio +matplotlib +rarfile +scipy>=0.19.0 +Pillow +nltk>=3.2.2 diff --git a/python/setup.py.in b/python/setup.py.in index 4110c98318..38728aa2fd 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,5 +1,4 @@ from setuptools import setup, Distribution - class BinaryDistribution(Distribution): def has_ext_modules(foo): return True @@ -18,15 +17,8 @@ packages=['paddle', 'paddle.v2.framework.proto', 'py_paddle'] -setup_requires=["requests", - "numpy>=1.12", - "protobuf==3.1", - "recordio", - "matplotlib", - "rarfile", - "scipy>=0.19.0", - "Pillow", - "nltk>=3.2.2"] +with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: + setup_requires = f.read().splitlines() if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] From 8e0bf6d9337b3a615c0203639f0a6755c51dfd6e Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 13:45:51 -0700 Subject: [PATCH 5/8] Update --- paddle/framework/grad_op_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 6d032fb78f..0121d99961 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -76,7 +76,7 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, } OperatorBase* BuildGradOp(const OperatorBase* op) { - std::string grad_op_type = OpRegistry::grad_ops().at(op->type_); + const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); grad_op->type_ = grad_op_type; grad_op->attrs_ = op->attrs_; From 717fe5495e413eef0852dbd01689385d263aa256 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 15:02:25 -0700 Subject: [PATCH 6/8] UPdate grad_op_builder.cc --- paddle/framework/grad_op_builder.cc | 83 ++++++++++++++++------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index 0121d99961..cbfc1bfab0 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -19,45 +19,46 @@ permissions and limitations under the License. */ namespace paddle { namespace framework { -class OpRegistry; - using VarIndexMap = std::unordered_map; +typedef std::vector Ints; + enum class OpArgType { IN, OUT }; -static std::vector* GetOpFormat(OperatorBase* op, const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +const Ints* AttrFormat(const AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static const std::vector* GetOpFormat(const OperatorBase* op, - const OpArgType& type) { - std::string key = type == OpArgType::IN ? "input_format" : "output_format"; - return op->attrs_.count(key) - ? &boost::get>(op->attrs_.at(key)) - : nullptr; +Ints* AttrFormat(AttributeMap& attrs, const std::string& key) { + return (attrs.count(key) > 0) ? &boost::get(attrs.at(key)) : nullptr; } -static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, - const OpArgType& src_type, const OpArgType& dst_type, +static void TransOpArg(const OperatorBase* src_op, + std::vector& grad_inputs, + std::vector& grad_outputs, + AttributeMap& grad_attrs, + std::unordered_map& grad_idxs, + const std::string& src_type, const std::string& dst_type, int& idx, bool is_grad) { const std::vector& src_inout = - src_type == OpArgType::IN ? src_op->inputs_ : src_op->outputs_; - const std::vector* src_format = GetOpFormat(src_op, src_type); + (src_type == "input_format") ? src_op->inputs_ : src_op->outputs_; + + const std::vector* src_format = AttrFormat(src_op->Attrs(), src_type); std::vector& dst_inout = - dst_type == OpArgType::IN ? dst_op->inputs_ : dst_op->outputs_; - std::vector* dst_format = GetOpFormat(dst_op, dst_type); + (dst_type == "input_format") ? grad_inputs : grad_outputs; + + std::vector* dst_format = AttrFormat(grad_attrs, dst_type); + const OpProto& proto = OpRegistry::protos().at(src_op->type_); + const auto& src_arg_list = - src_type == OpArgType::IN ? proto.inputs() : proto.outputs(); + (src_type == "input_format") ? proto.inputs() : proto.outputs(); for (const auto& arg : src_arg_list) { std::string src_name = arg.name(); std::string dst_name = is_grad ? src_name + kGradVarSuffix : src_name; - (*dst_op->in_out_idxs_)[dst_name] = idx++; + grad_idxs[dst_name] = idx++; int src_arg_idx = src_op->in_out_idxs_->at(src_name); int src_begin = src_format == nullptr ? src_arg_idx : src_format->at(src_arg_idx); @@ -77,25 +78,35 @@ static void TransOpArg(const OperatorBase* src_op, OperatorBase* dst_op, OperatorBase* BuildGradOp(const OperatorBase* op) { const std::string& grad_op_type = OpRegistry::grad_ops().at(op->Type()); - OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - grad_op->type_ = grad_op_type; - grad_op->attrs_ = op->attrs_; - grad_op->attrs_.erase("input_format"); - grad_op->attrs_.erase("output_format"); - if (GetOpFormat(op, OpArgType::IN) != nullptr) { - grad_op->attrs_["output_format"] = std::vector({0}); + + AttributeMap grad_attrs(op->Attrs()); + grad_attrs.erase("input_format"); + grad_attrs.erase("output_format"); + if (op->Attrs().count("input_format") > 0) { + grad_attrs["output_format"] = std::vector({0}); } - if (GetOpFormat(op, OpArgType::IN) != nullptr || - GetOpFormat(op, OpArgType::OUT) != nullptr) { - grad_op->attrs_["input_format"] = std::vector({0}); + if (op->Attrs().count("input_format") > 0 || + op->Attrs().count("output_format") > 0) { + grad_attrs["input_format"] = std::vector({0}); } - grad_op->in_out_idxs_.reset(new VarIndexMap()); + + std::vector grad_inputs, grad_outputs; + std::unordered_map grad_idxs; int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::IN, in_idx, false); // I - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, false); // G - TransOpArg(op, grad_op, OpArgType::OUT, OpArgType::IN, in_idx, true); // OG - TransOpArg(op, grad_op, OpArgType::IN, OpArgType::OUT, out_idx, true); // IG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "input_format", "input_format", in_idx, false); // I + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "output_format", "input_format", in_idx, false); // G + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "output_format", "input_format", in_idx, true); // OG + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + "input_format", "output_format", out_idx, true); // IG + + OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); + + // TODO(yi): Set data member of grad_op. + return grad_op; } From 5381a6eef8f1313c46105fe019a60eb753e0b75c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Fri, 11 Aug 2017 15:08:57 -0700 Subject: [PATCH 7/8] Update --- paddle/framework/grad_op_builder.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/paddle/framework/grad_op_builder.cc b/paddle/framework/grad_op_builder.cc index cbfc1bfab0..8bd2bc5902 100644 --- a/paddle/framework/grad_op_builder.cc +++ b/paddle/framework/grad_op_builder.cc @@ -19,8 +19,6 @@ permissions and limitations under the License. */ namespace paddle { namespace framework { -using VarIndexMap = std::unordered_map; - typedef std::vector Ints; enum class OpArgType { IN, OUT }; @@ -91,21 +89,27 @@ OperatorBase* BuildGradOp(const OperatorBase* op) { } std::vector grad_inputs, grad_outputs; - std::unordered_map grad_idxs; + + using VarIndexMap = std::unordered_map; + VarIndexMap* grad_idxs = new VarIndexMap; int in_idx = 0; int out_idx = 0; - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "input_format", "input_format", in_idx, false); // I - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "output_format", "input_format", in_idx, false); // G - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "output_format", "input_format", in_idx, true); // OG - TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, grad_idxs, + TransOpArg(op, grad_inputs, grad_outputs, grad_attrs, *grad_idxs, "input_format", "output_format", out_idx, true); // IG OperatorBase* grad_op = OpRegistry::op_creators().at(grad_op_type)(); - // TODO(yi): Set data member of grad_op. + grad_op->type_ = grad_op_type; + grad_op->inputs_ = grad_inputs; + grad_op->outputs_ = grad_outputs; + grad_op->attrs_ = grad_attrs; + grad_op->in_out_idxs_.reset(grad_idxs); return grad_op; } From 37c2a23884524e6cf76b83eb981638f58d30d22d Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 11 Aug 2017 22:12:44 +0000 Subject: [PATCH 8/8] fix cpplint error --- paddle/trainer/NewRemoteParameterUpdater.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index cccb7e7cdd..35dcb235e7 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -68,7 +68,7 @@ void NewRemoteParameterUpdater::init( LOG(INFO) << "paddle_begin_init_params start"; // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. // This makes golang pserver compatible with handy V1 demos. - // TODO: Refine or remove these ugly converting lines + // TODO(wuyi): Refine or remove these ugly converting lines OptimizerConfig optimizerConfigV2; if (trainerConfig_.learning_method() == "momentum") { optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD);