From bad3d4b661fca8ae74ab45ff980590e4709a71a9 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 25 Dec 2017 19:21:13 +0800 Subject: [PATCH 01/15] Grad Check For RNN --- .../operators/tensor_array_read_write_op.cc | 11 + paddle/operators/while_op.cc | 15 +- .../fluid/tests/test_dynrnn_gradient_check.py | 215 ++++++++++++++++++ 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 2ee9bf700c..59a4dac940 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -136,6 +136,17 @@ class ReadFromArrayOp : public ArrayOp { auto &dev_ctx = *pool.Borrow(place); framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); out_tensor->set_lod(x_array[offset].lod()); + if (Input("X") == "dynamic_rnn_0_output_array_fc_0.tmp_0_0@GRAD") { + VLOG(10) << "Offset = " << offset; + if (x_array[offset].numel() != 0) { + auto d = x_array[offset].dims(); + std::ostringstream sout; + for (int64_t i = 0; i < d[0]; ++i) { + sout << x_array[offset].data()[0 * d[1]] << ", "; + } + VLOG(10) << "Grad = " << sout.str(); + } + } } else { VLOG(10) << "offset " << offset << " >= " << x_array.size(); } diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 11ee96faad..d7c34297cd 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -129,6 +129,9 @@ class WhileGradOp : public framework::OperatorBase { auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name), "Cannot find inside gradient %s", inside_og_name); + + VLOG(10) << "OG " << outside_og_name << " Type is " + << og_outside.Type().name(); if (og_outside.Type().hash_code() == typeid(framework::LoDTensor).hash_code()) { auto &outside_tensor = og_outside.Get(); @@ -145,7 +148,6 @@ class WhileGradOp : public framework::OperatorBase { inside_array.resize(outside_array.size()); for (size_t j = 0; j < inside_array.size(); ++j) { - VLOG(10) << j << " " << outside_array[j].numel(); if (outside_array[j].numel() != 0) { inside_array[j].set_lod(outside_array[j].lod()); inside_array[j].ShareDataWith(outside_array[j]); @@ -198,6 +200,17 @@ class WhileGradOp : public framework::OperatorBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); + + VLOG(10) << "Accumulate the gradient of " << pg_names[param_id]; + + if (pg_names[param_id] == "W@GRAD") { + auto &w_g = detail::Ref(cur_scope.FindVar(new_inside_name)) + .Get(); + VLOG(10) << "W_G is" << w_g.data()[0]; + } else { + VLOG(10) << pg_names[param_id]; + } + sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); } diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py new file mode 100644 index 0000000000..99b9285466 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -0,0 +1,215 @@ +import numpy +import random +import collections +import paddle.v2.fluid as fluid +import unittest +import copy + + +class Memory(object): + def __init__(self, shape, dtype='float32'): + self.ex = numpy.zeros(shape=shape, dtype=dtype) + self.cur = None + + def update(self, val): + assert val.shape == self.ex.shape + assert val.dtype == self.ex.dtype + self.cur = val + + def ex(self): + return self.ex + + def next(self): + self.ex = self.cur + self.cur = None + + def __next__(self): + self.next() + + def reset(self): + self.ex = numpy.zeros(shape=self.ex.shape, dtype=self.ex.dtype) + self.cur = None + + +class Output(object): + def __init__(self): + self.outs = [] + + def next_sequence(self): + self.outs.append([]) + + def out(self, val): + self.outs[-1].append(val) + + def last(self): + return self.outs[-1][-1] + + +class BaseRNN(object): + def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15): + self.num_seq = num_seq + self.inputs = collections.defaultdict(list) + + for _ in xrange(num_seq): + seq_len = random.randint(1, max_seq_len - 1) + for iname in ins: + ishape = ins[iname].get('shape', None) + idtype = ins[iname].get('dtype', 'float32') + lst = [] + for _ in xrange(seq_len): + lst.append(numpy.random.random(size=ishape).astype(idtype)) + self.inputs[iname].append(lst) + + self.mems = dict() + for mname in mems: + mshape = mems[mname].get('shape', None) + mdtype = mems[mname].get('dtype', 'float32') + self.mems[mname] = Memory(shape=mshape, dtype=mdtype) + + self.params = dict() + for pname in params: + pshape = params[pname].get('shape', None) + pdtype = params[pname].get('dtype', 'float32') + self.params[pname] = numpy.random.random(size=pshape).astype(pdtype) + + self.outputs = dict() + + for oname in outs: + self.outputs[oname] = Output() + + def step(self, **kwargs): + pass + + def exe(self): + retv = dict() + for out in self.outputs: + retv[out] = [] + + for seq_id in xrange(self.num_seq): + for mname in self.mems: + self.mems[mname].reset() + for out in self.outputs: + self.outputs[out].next_sequence() + + iname0 = self.inputs.keys()[0] + seq_len = len(self.inputs[iname0][seq_id]) + + for step_id in xrange(seq_len): + xargs = dict() + + for iname in self.inputs: + xargs[iname] = self.inputs[iname][seq_id][step_id] + + for mname in self.mems: + xargs[mname] = self.mems[mname] + + for pname in self.params: + xargs[pname] = self.params[pname] + + for out in self.outputs: + xargs[out] = self.outputs[out] + + self.step(**xargs) + + for mname in self.mems: + next(self.mems[mname]) + + for out in self.outputs: + retv[out].append(self.outputs[out].last()) + + for out in retv: + retv[out] = numpy.array(retv[out]) + return retv + + def to_feed(self, place): + feed_dict = dict() + + for iname in self.inputs: + lod = [0] + np_flatten = [] + for seq_id in xrange(len(self.inputs[iname])): + seq_len = len(self.inputs[iname][seq_id]) + lod.append(lod[-1] + seq_len) + np_flatten.extend(self.inputs[iname][seq_id]) + + t = fluid.Tensor() + t.set(numpy.array(np_flatten), place) + t.set_lod([lod]) + feed_dict[iname] = t + + for pname in self.params: + feed_dict[pname] = self.params[pname] + return feed_dict + + def get_numeric_gradient_of_param(self, param_name, delta=0.01): + p = self.params[param_name] + g = numpy.zeros(shape=p.shape, dtype=p.dtype) + + for p_it, g_it in numpy.nditer([p, g], op_flags=['readwrite']): + o = float(p_it) + p_it[...] = o + delta + pos = self._exe_mean_out_() + p_it[...] = o - delta + neg = self._exe_mean_out_() + p_it[...] = o + g[:] = (pos - neg) / (delta * 2) + return g + + def _exe_mean_out_(self): + outs = self.exe() + return numpy.array([o.mean() for o in outs.itervalues()]).mean() + + +class SimpleMul(BaseRNN): + def __init__(self): + super(SimpleMul, self).__init__({ + 'X': { + 'shape': [32] + } + }, {}, {'W': { + 'shape': [32, 10] + }}, ['Out']) + + def step(self, X, W, Out): + Out.out(numpy.matmul(X, W)) + + +class TestSimpleMul(unittest.TestCase): + def setUp(self): + self.python_impl = SimpleMul() + + def test_forward(self): + program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(program, startup_program): + dat = fluid.layers.data(name='X', shape=[32], lod_level=1) + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + d = rnn.step_input(dat) + o = fluid.layers.fc(input=d, + param_attr='W', + bias_attr=False, + size=10, + act=None) + rnn.output(o) + + out = rnn() + out = fluid.layers.sequence_pool(out, pool_type='last') + loss = fluid.layers.mean(x=out) + fluid.backward.append_backward_ops(loss) + + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + out, w_g = exe.run(program, + feed=self.python_impl.to_feed(cpu), + fetch_list=[out, "W@GRAD"]) + out_by_python = self.python_impl.exe()['Out'] + self.assertTrue(numpy.allclose(out, out_by_python)) + w_g_num = self.python_impl.get_numeric_gradient_of_param("W") + print w_g_num[0][0] + print w_g_num - w_g + + +if __name__ == '__main__': + unittest.main() From 4450a312a9228d0237b794d05a75c6de71b3aa55 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 13:13:01 +0800 Subject: [PATCH 02/15] Polish Unittest --- python/paddle/v2/fluid/tests/decorators.py | 27 +++++++ .../fluid/tests/test_dynrnn_gradient_check.py | 80 +++++++++---------- 2 files changed, 67 insertions(+), 40 deletions(-) create mode 100644 python/paddle/v2/fluid/tests/decorators.py diff --git a/python/paddle/v2/fluid/tests/decorators.py b/python/paddle/v2/fluid/tests/decorators.py new file mode 100644 index 0000000000..d3dcf3562d --- /dev/null +++ b/python/paddle/v2/fluid/tests/decorators.py @@ -0,0 +1,27 @@ +import paddle.v2.fluid as fluid + +__all__ = ['many_times', 'prog_scope'] + + +def many_times(times): + def __impl__(fn): + def __fn__(*args, **kwargs): + for _ in range(times): + fn(*args, **kwargs) + + return __fn__ + + return __impl__ + + +def prog_scope(): + def __impl__(fn): + def __fn__(*args, **kwargs): + prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(prog, startup_prog): + fn(*args, **kwargs) + + return __fn__ + + return __impl__ diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 99b9285466..3018588c3a 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -3,7 +3,7 @@ import random import collections import paddle.v2.fluid as fluid import unittest -import copy +from decorators import * class Memory(object): @@ -78,7 +78,7 @@ class BaseRNN(object): self.outputs[oname] = Output() def step(self, **kwargs): - pass + raise NotImplementedError() def exe(self): retv = dict() @@ -141,18 +141,22 @@ class BaseRNN(object): feed_dict[pname] = self.params[pname] return feed_dict - def get_numeric_gradient_of_param(self, param_name, delta=0.01): + def get_numeric_gradient_of_param(self, param_name, delta=0.001): + if len(p.shape) != 2: + raise ValueError("Not support get numeric gradient of an parameter," + " which is not matrix") p = self.params[param_name] g = numpy.zeros(shape=p.shape, dtype=p.dtype) - for p_it, g_it in numpy.nditer([p, g], op_flags=['readwrite']): - o = float(p_it) - p_it[...] = o + delta - pos = self._exe_mean_out_() - p_it[...] = o - delta - neg = self._exe_mean_out_() - p_it[...] = o - g[:] = (pos - neg) / (delta * 2) + for i in xrange(p.shape[0]): + for j in xrange(p.shape[1]): + o = p[i][j] + p[i][j] += delta + pos = self._exe_mean_out_() + p[i][j] -= 2 * delta + neg = self._exe_mean_out_() + p[i][j] = o + g[i][j] = (pos - neg) / (delta * 2) return g def _exe_mean_out_(self): @@ -175,40 +179,36 @@ class SimpleMul(BaseRNN): class TestSimpleMul(unittest.TestCase): - def setUp(self): - self.python_impl = SimpleMul() - - def test_forward(self): - program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(program, startup_program): - dat = fluid.layers.data(name='X', shape=[32], lod_level=1) - - rnn = fluid.layers.DynamicRNN() - with rnn.block(): - d = rnn.step_input(dat) - o = fluid.layers.fc(input=d, - param_attr='W', - bias_attr=False, - size=10, - act=None) - rnn.output(o) - - out = rnn() - out = fluid.layers.sequence_pool(out, pool_type='last') - loss = fluid.layers.mean(x=out) - fluid.backward.append_backward_ops(loss) + # Test many times in local to ensure the random seed cannot breaks CI + # @many_times(10) + @prog_scope() + def test_forward_backward(self): + python_impl = SimpleMul() + dat = fluid.layers.data(name='X', shape=[32], lod_level=1) + + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + d = rnn.step_input(dat) + o = fluid.layers.fc(input=d, + param_attr='W', + bias_attr=False, + size=10, + act=None) + rnn.output(o) + + out = rnn() + out = fluid.layers.sequence_pool(out, pool_type='last') + loss = fluid.layers.mean(x=out) + fluid.backward.append_backward_ops(loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) - out, w_g = exe.run(program, - feed=self.python_impl.to_feed(cpu), + out, w_g = exe.run(feed=python_impl.to_feed(cpu), fetch_list=[out, "W@GRAD"]) - out_by_python = self.python_impl.exe()['Out'] + out_by_python = python_impl.exe()['Out'] self.assertTrue(numpy.allclose(out, out_by_python)) - w_g_num = self.python_impl.get_numeric_gradient_of_param("W") - print w_g_num[0][0] - print w_g_num - w_g + w_g_num = python_impl.get_numeric_gradient_of_param("W") + self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05)) if __name__ == '__main__': From e566b94fba2a3f5c48629841cbace40af8464fa3 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 13:14:27 +0800 Subject: [PATCH 03/15] Revert C++ changes --- paddle/operators/tensor_array_read_write_op.cc | 11 ----------- paddle/operators/while_op.cc | 15 +-------------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 59a4dac940..2ee9bf700c 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -136,17 +136,6 @@ class ReadFromArrayOp : public ArrayOp { auto &dev_ctx = *pool.Borrow(place); framework::CopyFrom(x_array[offset], place, dev_ctx, out_tensor); out_tensor->set_lod(x_array[offset].lod()); - if (Input("X") == "dynamic_rnn_0_output_array_fc_0.tmp_0_0@GRAD") { - VLOG(10) << "Offset = " << offset; - if (x_array[offset].numel() != 0) { - auto d = x_array[offset].dims(); - std::ostringstream sout; - for (int64_t i = 0; i < d[0]; ++i) { - sout << x_array[offset].data()[0 * d[1]] << ", "; - } - VLOG(10) << "Grad = " << sout.str(); - } - } } else { VLOG(10) << "offset " << offset << " >= " << x_array.size(); } diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index d7c34297cd..11ee96faad 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -129,9 +129,6 @@ class WhileGradOp : public framework::OperatorBase { auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name), "Cannot find inside gradient %s", inside_og_name); - - VLOG(10) << "OG " << outside_og_name << " Type is " - << og_outside.Type().name(); if (og_outside.Type().hash_code() == typeid(framework::LoDTensor).hash_code()) { auto &outside_tensor = og_outside.Get(); @@ -148,6 +145,7 @@ class WhileGradOp : public framework::OperatorBase { inside_array.resize(outside_array.size()); for (size_t j = 0; j < inside_array.size(); ++j) { + VLOG(10) << j << " " << outside_array[j].numel(); if (outside_array[j].numel() != 0) { inside_array[j].set_lod(outside_array[j].lod()); inside_array[j].ShareDataWith(outside_array[j]); @@ -200,17 +198,6 @@ class WhileGradOp : public framework::OperatorBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); - - VLOG(10) << "Accumulate the gradient of " << pg_names[param_id]; - - if (pg_names[param_id] == "W@GRAD") { - auto &w_g = detail::Ref(cur_scope.FindVar(new_inside_name)) - .Get(); - VLOG(10) << "W_G is" << w_g.data()[0]; - } else { - VLOG(10) << pg_names[param_id]; - } - sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); } From 2bfa9796bf4615e0898b33b7b97bb3ca0db013d5 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 13:19:57 +0800 Subject: [PATCH 04/15] Fix check --- python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 3018588c3a..d0b805882f 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -142,10 +142,10 @@ class BaseRNN(object): return feed_dict def get_numeric_gradient_of_param(self, param_name, delta=0.001): + p = self.params[param_name] if len(p.shape) != 2: raise ValueError("Not support get numeric gradient of an parameter," " which is not matrix") - p = self.params[param_name] g = numpy.zeros(shape=p.shape, dtype=p.dtype) for i in xrange(p.shape[0]): From 32313994ba0091676616435db7b8d3487d4cb41b Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 13:33:42 +0800 Subject: [PATCH 05/15] Add forward test with mem --- .../fluid/tests/test_dynrnn_gradient_check.py | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index d0b805882f..ef7d5ca9f5 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -211,5 +211,67 @@ class TestSimpleMul(unittest.TestCase): self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05)) +class TestSimpleMulWithMemory(unittest.TestCase): + DATA_WIDTH = 32 + HIDDEN_WIDTH = 10 + DATA_NAME = 'X' + PARAM_NAME = 'W' + + class SimpleMulWithMemory(BaseRNN): + def __init__(self): + super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__({ + TestSimpleMulWithMemory.DATA_NAME: { + 'shape': [TestSimpleMulWithMemory.DATA_WIDTH] + } + }, {'Mem': { + 'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH] + }}, { + TestSimpleMulWithMemory.PARAM_NAME: { + 'shape': [ + TestSimpleMulWithMemory.DATA_WIDTH, + TestSimpleMulWithMemory.HIDDEN_WIDTH + ] + } + }, ['Out']) + + def step(self, X, Mem, W, Out): + o = numpy.matmul(X, W) + assert isinstance(Mem, Memory) + o += Mem.ex + Mem.update(o) + assert isinstance(Out, Output) + Out.out(o) + + @prog_scope() + def test_forward_backward(self): + py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() + + data = fluid.layers.data( + name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + rnn = fluid.layers.DynamicRNN() + with rnn.block(): + d = rnn.step_input(data) + mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH]) + hidden = fluid.layers.fc(input=d, + size=self.HIDDEN_WIDTH, + param_attr=self.PARAM_NAME, + bias_attr=False, + act=None) + o = fluid.layers.elementwise_add(x=hidden, y=mem) + rnn.update_memory(mem, o) + rnn.output(o) + + out = rnn() + last = fluid.layers.sequence_pool(input=out, pool_type='last') + + cpu = fluid.CPUPlace() + exe = fluid.Executor(cpu) + + last_np, = exe.run(feed=py_rnn.to_feed(cpu), fetch_list=[last]) + last_by_py, = py_rnn.exe().values() + + self.assertTrue(numpy.allclose(last_np, last_by_py)) + + if __name__ == '__main__': unittest.main() From 2a36e8ad76e624de8a051bbe1af2b7e7691c3280 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 13:38:32 +0800 Subject: [PATCH 06/15] Make as const name --- .../fluid/tests/test_dynrnn_gradient_check.py | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index ef7d5ca9f5..837666b76e 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -164,35 +164,44 @@ class BaseRNN(object): return numpy.array([o.mean() for o in outs.itervalues()]).mean() -class SimpleMul(BaseRNN): - def __init__(self): - super(SimpleMul, self).__init__({ - 'X': { - 'shape': [32] - } - }, {}, {'W': { - 'shape': [32, 10] - }}, ['Out']) +class TestSimpleMul(unittest.TestCase): + DATA_NAME = 'X' + DATA_WIDTH = 32 + PARAM_NAME = 'W' + HIDDEN_WIDTH = 10 + OUT_NAME = 'Out' - def step(self, X, W, Out): - Out.out(numpy.matmul(X, W)) + class SimpleMul(BaseRNN): + def __init__(self): + base = TestSimpleMul + super(base.SimpleMul, self).__init__({ + base.DATA_NAME: { + 'shape': [base.DATA_WIDTH] + } + }, {}, { + base.PARAM_NAME: { + 'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH] + } + }, [base.OUT_NAME]) + def step(self, X, W, Out): + Out.out(numpy.matmul(X, W)) -class TestSimpleMul(unittest.TestCase): # Test many times in local to ensure the random seed cannot breaks CI # @many_times(10) @prog_scope() def test_forward_backward(self): - python_impl = SimpleMul() - dat = fluid.layers.data(name='X', shape=[32], lod_level=1) + python_impl = TestSimpleMul.SimpleMul() + dat = fluid.layers.data( + name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) rnn = fluid.layers.DynamicRNN() with rnn.block(): d = rnn.step_input(dat) o = fluid.layers.fc(input=d, - param_attr='W', + param_attr=self.PARAM_NAME, bias_attr=False, - size=10, + size=self.HIDDEN_WIDTH, act=None) rnn.output(o) @@ -204,10 +213,10 @@ class TestSimpleMul(unittest.TestCase): cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) out, w_g = exe.run(feed=python_impl.to_feed(cpu), - fetch_list=[out, "W@GRAD"]) - out_by_python = python_impl.exe()['Out'] + fetch_list=[out, self.PARAM_NAME + "@GRAD"]) + out_by_python = python_impl.exe()[self.OUT_NAME] self.assertTrue(numpy.allclose(out, out_by_python)) - w_g_num = python_impl.get_numeric_gradient_of_param("W") + w_g_num = python_impl.get_numeric_gradient_of_param(self.PARAM_NAME) self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05)) From 938717ba2b34eb87d25eb451cec5f328c0977148 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 14:37:26 +0800 Subject: [PATCH 07/15] Stash --- paddle/framework/executor.cc | 8 ++ .../fluid/tests/test_dynrnn_gradient_check.py | 79 +++++++++++++++++-- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 997773c168..a07e8e0b1b 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -66,6 +66,14 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, PADDLE_ENFORCE_LT(static_cast(block_id), pdesc.Size()); auto& block = pdesc.Block(block_id); + if (VLOG_IS_ON(100)) { + std::ostringstream sout; + for (auto& name : scope->GetAllNames(false)) { + sout << name << ", "; + } + VLOG(100) << "Scope has variable " << sout.str(); + } + Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 837666b76e..22bb2b1cdf 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -159,6 +159,39 @@ class BaseRNN(object): g[i][j] = (pos - neg) / (delta * 2) return g + def get_numeric_gradient_of_input(self, + input_name, + delta=0.001, + return_one_tensor=True): + ipt = self.inputs[input_name] + grad = [] + + for seq in ipt: + seq_grad = [] + for item in seq: + item_grad = numpy.zeros(shape=item.shape, dtype=item.dtype) + if len(item.shape) != 1: + raise ValueError("Not support") + + for i in xrange(len(item)): + o = item[i] + item[i] += delta + pos = self._exe_mean_out_() + item[i] -= 2 * delta + neg = self._exe_mean_out_() + item[i] = o + item_grad[i] = (pos - neg) / (delta * 2) + seq_grad.append(item_grad) + grad.append(seq_grad) + + if not return_one_tensor: + return grad + + for i in xrange(len(grad)): + grad[i] = numpy.concatenate(grad[i]) + grad = numpy.concatenate(grad) + return grad + def _exe_mean_out_(self): outs = self.exe() return numpy.array([o.mean() for o in outs.itervalues()]).mean() @@ -191,9 +224,10 @@ class TestSimpleMul(unittest.TestCase): # @many_times(10) @prog_scope() def test_forward_backward(self): - python_impl = TestSimpleMul.SimpleMul() + py_rnn = TestSimpleMul.SimpleMul() dat = fluid.layers.data( name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + dat.stop_gradient = False rnn = fluid.layers.DynamicRNN() with rnn.block(): @@ -212,17 +246,26 @@ class TestSimpleMul(unittest.TestCase): cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) - out, w_g = exe.run(feed=python_impl.to_feed(cpu), - fetch_list=[out, self.PARAM_NAME + "@GRAD"]) - out_by_python = python_impl.exe()[self.OUT_NAME] + out, w_g, i_g = map(numpy.array, + exe.run(feed=py_rnn.to_feed(cpu), + fetch_list=[ + out, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" + ], + return_numpy=False)) + out_by_python = py_rnn.exe()[self.OUT_NAME] self.assertTrue(numpy.allclose(out, out_by_python)) - w_g_num = python_impl.get_numeric_gradient_of_param(self.PARAM_NAME) + w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05)) + i_g_num = py_rnn.get_numeric_gradient_of_input( + input_name=self.DATA_NAME) + i_g_num = i_g_num.reshape(i_g.shape) + self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05)) class TestSimpleMulWithMemory(unittest.TestCase): DATA_WIDTH = 32 - HIDDEN_WIDTH = 10 + HIDDEN_WIDTH = 20 DATA_NAME = 'X' PARAM_NAME = 'W' @@ -251,12 +294,14 @@ class TestSimpleMulWithMemory(unittest.TestCase): assert isinstance(Out, Output) Out.out(o) + # @many_times(10) @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() data = fluid.layers.data( name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) + data.stop_gradient = False rnn = fluid.layers.DynamicRNN() with rnn.block(): d = rnn.step_input(data) @@ -272,14 +317,32 @@ class TestSimpleMulWithMemory(unittest.TestCase): out = rnn() last = fluid.layers.sequence_pool(input=out, pool_type='last') + loss = fluid.layers.mean(x=last) + fluid.backward.append_backward_ops(loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) - - last_np, = exe.run(feed=py_rnn.to_feed(cpu), fetch_list=[last]) + feed = py_rnn.to_feed(cpu) + for _ in xrange(2): + last_np, w_g, i_g = map(numpy.array, + exe.run(feed=feed, + fetch_list=[ + last, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" + ], + return_numpy=False)) last_by_py, = py_rnn.exe().values() self.assertTrue(numpy.allclose(last_np, last_by_py)) + w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) + print w_g[0], w_g_num[0] + self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1)) + i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME) + i_g_num = i_g_num.reshape(i_g.shape) + + # Since this RNN has many float add. The number could be not stable. + # rtol = 0.1 + self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.1)) if __name__ == '__main__': From 82a22d3258b7024e64cd4045c5bbf32aa99f070f Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 17:06:23 +0800 Subject: [PATCH 08/15] Update code --- paddle/framework/executor.cc | 8 ------ paddle/framework/tensor_impl.h | 8 ++++++ paddle/operators/sum_op.h | 2 ++ python/paddle/v2/fluid/executor.py | 25 ++++++++++++++++--- .../tests/book/test_label_semantic_roles.py | 2 +- python/paddle/v2/fluid/tests/decorators.py | 6 +++-- .../fluid/tests/test_dynrnn_gradient_check.py | 20 +++++++-------- 7 files changed, 46 insertions(+), 25 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index a07e8e0b1b..997773c168 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -66,14 +66,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, PADDLE_ENFORCE_LT(static_cast(block_id), pdesc.Size()); auto& block = pdesc.Block(block_id); - if (VLOG_IS_ON(100)) { - std::ostringstream sout; - for (auto& name : scope->GetAllNames(false)) { - sout << name << ", "; - } - VLOG(100) << "Scope has variable " << sout.str(); - } - Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 6c6f298edc..46ea3b881d 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -134,6 +134,14 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { #endif offset_ = 0; } + + if (typeid(float).hash_code() == type.hash_code()) { + auto buf = reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); + for (int64_t i = 0; i < this->numel(); ++i) { + buf[i] = NAN; + } + } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); } diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index eaa36aa1ae..cbde9976dc 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -107,10 +107,12 @@ class SumKernel : public framework::OpKernel { out_array.resize(i + 1); } if (out_array[i].numel() == 0) { + VLOG(10) << context.op().Output("Out") << " just copy"; framework::CopyFrom(in_array[i], in_array[i].place(), context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); } else { + VLOG(10) << context.op().Output("Out") << " merged"; PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); auto in = EigenVector::Flatten(in_array[i]); auto result = EigenVector::Flatten(out_array[i]); diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py index 2c91afb363..1d6c594b41 100644 --- a/python/paddle/v2/fluid/executor.py +++ b/python/paddle/v2/fluid/executor.py @@ -1,12 +1,31 @@ import numpy as np +import contextlib +from framework import Program, default_main_program from . import core -from framework import Program, default_main_program, Parameter, Variable -__all__ = ['Executor', 'g_scope'] +__all__ = ['Executor', 'global_scope', 'scope_guard', 'switch_scope'] g_scope = core.Scope() +def global_scope(): + return g_scope + + +def switch_scope(scope): + global g_scope + ex = g_scope + g_scope = scope + return ex + + +@contextlib.contextmanager +def scope_guard(scope): + ex = switch_scope(scope) + yield + switch_scope(ex) + + def as_numpy(tensor): if isinstance(tensor, list): return [as_numpy(t) for t in tensor] @@ -117,7 +136,7 @@ class Executor(object): raise TypeError() if scope is None: - scope = g_scope + scope = global_scope() program = program.clone() global_block = program.global_block() diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index c3591a613a..8acd470c5e 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -170,7 +170,7 @@ def main(): exe.run(fluid.default_startup_program()) - embedding_param = fluid.g_scope.find_var(embedding_name).get_tensor() + embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) diff --git a/python/paddle/v2/fluid/tests/decorators.py b/python/paddle/v2/fluid/tests/decorators.py index d3dcf3562d..154619b0e9 100644 --- a/python/paddle/v2/fluid/tests/decorators.py +++ b/python/paddle/v2/fluid/tests/decorators.py @@ -19,8 +19,10 @@ def prog_scope(): def __fn__(*args, **kwargs): prog = fluid.Program() startup_prog = fluid.Program() - with fluid.program_guard(prog, startup_prog): - fn(*args, **kwargs) + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + fn(*args, **kwargs) return __fn__ diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 22bb2b1cdf..7f61b966fd 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -298,7 +298,6 @@ class TestSimpleMulWithMemory(unittest.TestCase): @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() - data = fluid.layers.data( name=self.DATA_NAME, shape=[self.DATA_WIDTH], lod_level=1) data.stop_gradient = False @@ -323,19 +322,18 @@ class TestSimpleMulWithMemory(unittest.TestCase): cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) feed = py_rnn.to_feed(cpu) - for _ in xrange(2): - last_np, w_g, i_g = map(numpy.array, - exe.run(feed=feed, - fetch_list=[ - last, self.PARAM_NAME + "@GRAD", - self.DATA_NAME + "@GRAD" - ], - return_numpy=False)) + last_np, w_g, i_g = map(numpy.array, + exe.run(feed=feed, + fetch_list=[ + last, self.PARAM_NAME + "@GRAD", + self.DATA_NAME + "@GRAD" + ], + return_numpy=False)) last_by_py, = py_rnn.exe().values() - self.assertTrue(numpy.allclose(last_np, last_by_py)) w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) - print w_g[0], w_g_num[0] + # print w_g_num[0], w_g[0] + self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1)) i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME) i_g_num = i_g_num.reshape(i_g.shape) From 8728885031be996588520373cf3eec8fab0efee3 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Tue, 26 Dec 2017 17:09:57 +0800 Subject: [PATCH 09/15] Revert debug code --- paddle/framework/tensor_impl.h | 8 -------- paddle/operators/sum_op.h | 2 -- 2 files changed, 10 deletions(-) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 46ea3b881d..6c6f298edc 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -134,14 +134,6 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { #endif offset_ = 0; } - - if (typeid(float).hash_code() == type.hash_code()) { - auto buf = reinterpret_cast( - reinterpret_cast(holder_->ptr()) + offset_); - for (int64_t i = 0; i < this->numel(); ++i) { - buf[i] = NAN; - } - } return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); } diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index cbde9976dc..eaa36aa1ae 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -107,12 +107,10 @@ class SumKernel : public framework::OpKernel { out_array.resize(i + 1); } if (out_array[i].numel() == 0) { - VLOG(10) << context.op().Output("Out") << " just copy"; framework::CopyFrom(in_array[i], in_array[i].place(), context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); } else { - VLOG(10) << context.op().Output("Out") << " merged"; PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); auto in = EigenVector::Flatten(in_array[i]); auto result = EigenVector::Flatten(out_array[i]); From f5c2d175ae105e8938e8343068eff31db5745c19 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Thu, 28 Dec 2017 10:25:18 +0800 Subject: [PATCH 10/15] Refine --- paddle/framework/executor.cc | 9 +++++---- paddle/framework/tensor_impl.h | 13 +++++++++++-- paddle/framework/variable.h | 1 + paddle/operators/fill_constant_op.cc | 1 + paddle/operators/shrink_rnn_memory_op.cc | 5 +++-- paddle/operators/sum_op.h | 4 +--- paddle/operators/tensor_array_read_write_op.cc | 2 +- paddle/operators/while_op.cc | 13 +++++++++++++ 8 files changed, 36 insertions(+), 12 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 9ee2ddb7c3..fe9a42ace0 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -59,15 +59,16 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { static void CheckTensorNANOrInf(const std::string& name, const framework::Tensor& tensor) { - if (tensor.type().hash_code() != typeid(float).hash_code() && - tensor.type().hash_code() != typeid(double).hash_code()) { + if (tensor.memory_size() == 0) { return; } - if (tensor.memory_size() == 0) { + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { return; } PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); - PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN, %p", name, + &tensor); } void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 6c6f298edc..0161ed8c47 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -134,8 +134,17 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { #endif offset_ = 0; } - return reinterpret_cast(reinterpret_cast(holder_->ptr()) + - offset_); + void* buf = reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); + if (type.hash_code() == typeid(float).hash_code() || + type.hash_code() == typeid(double).hash_code()) { + float* tmp = (float*)(buf); + for (int64_t i = 0; i < numel(); ++i) { + tmp[i] = NAN; + } + } + + return buf; } inline void* Tensor::mutable_data(platform::Place place) { diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index e5a94759f9..3720393601 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -35,6 +35,7 @@ class Variable { template T* GetMutable() { if (!IsType()) { + VLOG(10) << "Resetting " << *this->name_; holder_.reset(new PlaceholderImpl(new T())); } return static_cast(holder_->Ptr()); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index dcd43a30c8..196c380c73 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -51,6 +51,7 @@ class FillConstantOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); + VLOG(10) << "FillConstant to " << &out; math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index e5ef0740b6..9ef473e726 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -116,9 +116,10 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto height = dout_tensor.dims()[0]; auto slice = dx_tensor.Slice(0, static_cast(height)); framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); - if (dx_tensor.dims()[0] < height) { + VLOG(10) << dx_tensor.dims()[0] << ", " << height; + if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( - static_cast(height), static_cast(dout_tensor.dims()[0])); + static_cast(height), static_cast(dx_tensor.dims()[0])); math::set_constant(dev_ctx, &rest_tensor, 0.0f); } } diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index eaa36aa1ae..d1277d3edd 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -38,11 +38,9 @@ class SumKernel : public framework::OpKernel { if (out_var->IsType()) { auto *out = context.Output("Out"); - out->mutable_data(context.GetPlace()); - auto result = EigenVector::Flatten(*out); - if (!in_place) { + out->mutable_data(context.GetPlace()); math::SetConstant constant_functor; constant_functor(context.template device_context(), out, 0.0); diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 53e38ec703..d5ff3e3fce 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -130,9 +130,9 @@ class ReadFromArrayOp : public ArrayOp { auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out != nullptr, "Out must be set"); - auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { + auto *out_tensor = out->GetMutable(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 728ef60794..322270c829 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -194,14 +194,27 @@ class WhileGradOp : public framework::OperatorBase { } } + auto check_var_no_nan = [](const framework::Scope &scope, + const std::string &var_name) { + auto *var = scope.FindVar(var_name); + if (var->IsType()) { + VLOG(10) << "Checking " << var_name; + PADDLE_ENFORCE(!framework::HasNAN(var->Get()), + "%s has NAN", var_name); + } + }; + check_var_no_nan(cur_scope, inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name); + check_var_no_nan(cur_scope, new_inside_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); sum_op->Run(cur_scope, dev_place); + check_var_no_nan(cur_scope, pg_names[param_id]); cur_scope.Rename(new_inside_name, inside_grad_name); } } + VLOG(1) << "Complete WhileOpGrad"; } }; From 96bc335216f418a8682e49f75ddaf50eedb71704 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Thu, 28 Dec 2017 12:49:02 +0800 Subject: [PATCH 11/15] Update --- paddle/framework/variable.h | 1 - paddle/operators/sum_op.h | 6 ++++-- python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index 3720393601..e5a94759f9 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -35,7 +35,6 @@ class Variable { template T* GetMutable() { if (!IsType()) { - VLOG(10) << "Resetting " << *this->name_; holder_.reset(new PlaceholderImpl(new T())); } return static_cast(holder_->Ptr()); diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index d1277d3edd..552b48f608 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -37,10 +37,12 @@ class SumKernel : public framework::OpKernel { bool in_place = out_var == in_vars[0]; if (out_var->IsType()) { - auto *out = context.Output("Out"); - auto result = EigenVector::Flatten(*out); + auto *out = context.Output("Out"); if (!in_place) { out->mutable_data(context.GetPlace()); + } + auto result = EigenVector::Flatten(*out); + if (!in_place) { math::SetConstant constant_functor; constant_functor(context.template device_context(), out, 0.0); diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 7f61b966fd..238fd1a8cb 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -242,7 +242,7 @@ class TestSimpleMul(unittest.TestCase): out = rnn() out = fluid.layers.sequence_pool(out, pool_type='last') loss = fluid.layers.mean(x=out) - fluid.backward.append_backward_ops(loss) + fluid.backward.append_backward(loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) @@ -317,7 +317,7 @@ class TestSimpleMulWithMemory(unittest.TestCase): out = rnn() last = fluid.layers.sequence_pool(input=out, pool_type='last') loss = fluid.layers.mean(x=last) - fluid.backward.append_backward_ops(loss) + fluid.backward.append_backward(loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) @@ -330,6 +330,7 @@ class TestSimpleMulWithMemory(unittest.TestCase): ], return_numpy=False)) last_by_py, = py_rnn.exe().values() + print w_g[0] self.assertTrue(numpy.allclose(last_np, last_by_py)) w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) # print w_g_num[0], w_g[0] From d2cb28413e5728b819cc93ec68a5c81d62f75007 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Thu, 28 Dec 2017 16:58:39 +0800 Subject: [PATCH 12/15] Fix ALL RNN error --- paddle/operators/while_op.cc | 12 +++++++++++- paddle/pybind/tensor_py.h | 8 ++++---- .../v2/fluid/tests/test_dynrnn_gradient_check.py | 6 ++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 322270c829..341c163aa1 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include "paddle/framework/executor.h" #include "paddle/framework/lod_tensor_array.h" @@ -201,6 +202,15 @@ class WhileGradOp : public framework::OperatorBase { VLOG(10) << "Checking " << var_name; PADDLE_ENFORCE(!framework::HasNAN(var->Get()), "%s has NAN", var_name); + if (var->Get().type() == + typeid(float)) { // NOLINT + auto &tensor = var->Get(); + auto *buf = tensor.data(); + for (int64_t i = 0; i < tensor.numel(); ++i) { + PADDLE_ENFORCE(!std::isnan(buf[i])); + } + VLOG(10) << buf[0]; + } } }; check_var_no_nan(cur_scope, inside_grad_name); @@ -210,7 +220,7 @@ class WhileGradOp : public framework::OperatorBase { "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); sum_op->Run(cur_scope, dev_place); - check_var_no_nan(cur_scope, pg_names[param_id]); + check_var_no_nan(scope, pg_names[param_id]); cur_scope.Rename(new_inside_name, inside_grad_name); } } diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h index 4d5e73e2c2..6b4290972b 100644 --- a/paddle/pybind/tensor_py.h +++ b/paddle/pybind/tensor_py.h @@ -77,10 +77,10 @@ struct CastToPyBufferImpl { } else if (paddle::platform::is_cpu_place(tensor.place())) { dst_tensor = tensor; } - return py::buffer_info( - dst_tensor.mutable_data(dst_tensor.place()), - sizeof(CUR_TYPE), py::format_descriptor::format(), - (size_t)framework::arity(dst_tensor.dims()), dims_outside, strides); + return py::buffer_info(dst_tensor.data(), sizeof(CUR_TYPE), + py::format_descriptor::format(), + (size_t)framework::arity(dst_tensor.dims()), + dims_outside, strides); } else { constexpr bool less = I + 1 < std::tuple_size>::value; return CastToPyBufferImpl()(tensor); diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 238fd1a8cb..6569ccb9e6 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -294,7 +294,7 @@ class TestSimpleMulWithMemory(unittest.TestCase): assert isinstance(Out, Output) Out.out(o) - # @many_times(10) + @many_times(10) @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() @@ -330,10 +330,8 @@ class TestSimpleMulWithMemory(unittest.TestCase): ], return_numpy=False)) last_by_py, = py_rnn.exe().values() - print w_g[0] - self.assertTrue(numpy.allclose(last_np, last_by_py)) w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME) - # print w_g_num[0], w_g[0] + self.assertTrue(numpy.allclose(last_np, last_by_py)) self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1)) i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME) From 0fd4a04abdc6f411ebb77d7a389108e951223c7e Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 29 Dec 2017 13:10:53 +0800 Subject: [PATCH 13/15] Remove debug codes --- paddle/framework/tensor_impl.h | 13 ++----------- paddle/operators/fill_constant_op.cc | 1 - paddle/operators/shrink_rnn_memory_op.cc | 1 - paddle/operators/while_op.cc | 23 ----------------------- 4 files changed, 2 insertions(+), 36 deletions(-) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 0161ed8c47..6c6f298edc 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -134,17 +134,8 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { #endif offset_ = 0; } - void* buf = reinterpret_cast( - reinterpret_cast(holder_->ptr()) + offset_); - if (type.hash_code() == typeid(float).hash_code() || - type.hash_code() == typeid(double).hash_code()) { - float* tmp = (float*)(buf); - for (int64_t i = 0; i < numel(); ++i) { - tmp[i] = NAN; - } - } - - return buf; + return reinterpret_cast(reinterpret_cast(holder_->ptr()) + + offset_); } inline void* Tensor::mutable_data(platform::Place place) { diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index 196c380c73..dcd43a30c8 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -51,7 +51,6 @@ class FillConstantOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); - VLOG(10) << "FillConstant to " << &out; math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index 9ef473e726..b37269b471 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -116,7 +116,6 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto height = dout_tensor.dims()[0]; auto slice = dx_tensor.Slice(0, static_cast(height)); framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); - VLOG(10) << dx_tensor.dims()[0] << ", " << height; if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( static_cast(height), static_cast(dx_tensor.dims()[0])); diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 341c163aa1..728ef60794 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include "paddle/framework/executor.h" #include "paddle/framework/lod_tensor_array.h" @@ -195,36 +194,14 @@ class WhileGradOp : public framework::OperatorBase { } } - auto check_var_no_nan = [](const framework::Scope &scope, - const std::string &var_name) { - auto *var = scope.FindVar(var_name); - if (var->IsType()) { - VLOG(10) << "Checking " << var_name; - PADDLE_ENFORCE(!framework::HasNAN(var->Get()), - "%s has NAN", var_name); - if (var->Get().type() == - typeid(float)) { // NOLINT - auto &tensor = var->Get(); - auto *buf = tensor.data(); - for (int64_t i = 0; i < tensor.numel(); ++i) { - PADDLE_ENFORCE(!std::isnan(buf[i])); - } - VLOG(10) << buf[0]; - } - } - }; - check_var_no_nan(cur_scope, inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name); - check_var_no_nan(cur_scope, new_inside_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); sum_op->Run(cur_scope, dev_place); - check_var_no_nan(scope, pg_names[param_id]); cur_scope.Rename(new_inside_name, inside_grad_name); } } - VLOG(1) << "Complete WhileOpGrad"; } }; From fcd84c15303cac9573432a6ce4516c2d643064e8 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 29 Dec 2017 13:14:31 +0800 Subject: [PATCH 14/15] Comment debug code --- python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py index 6569ccb9e6..c02c59284e 100644 --- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py +++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py @@ -294,7 +294,8 @@ class TestSimpleMulWithMemory(unittest.TestCase): assert isinstance(Out, Output) Out.out(o) - @many_times(10) + # many_times used locally for debug. Make sure the calculation is stable. + # @many_times(10) @prog_scope() def test_forward_backward(self): py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory() From d25f382d0b8c095008e1f5694e7aaf6f7fa7c075 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Fri, 29 Dec 2017 14:52:40 +0800 Subject: [PATCH 15/15] Remove debug codes --- paddle/framework/executor.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index d465f88888..bf1f0471cc 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -67,8 +67,7 @@ static void CheckTensorNANOrInf(const std::string& name, return; } PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); - PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN, %p", name, - &tensor); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); } void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,