From 3ce2d295c0e196be109fedb230a6af0804b8338c Mon Sep 17 00:00:00 2001 From: minqiyang Date: Thu, 24 Jan 2019 13:55:26 +0800 Subject: [PATCH 1/6] Refine stop_gradient test=develop --- python/paddle/fluid/framework.py | 11 +++++++++++ python/paddle/fluid/imperative/nn.py | 13 ++++--------- python/paddle/fluid/optimizer.py | 2 +- .../tests/unittests/test_imperative_optimizer.py | 9 ++++----- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 3ddd73080b..17798e359c 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1307,6 +1307,17 @@ class Block(object): outputs=kwargs.get("outputs", None), attrs=kwargs.get("attrs", None)) self.ops.append(op) + + # set stop_gradient in static mode + if kwargs.get("stop_gradient", False): + outputs = kwargs.get("outputs", None) + if outputs is not None: + for k, v in six.iteritems(outputs): + if isinstance(v, Variable): + v.stop_gradient = True + elif isinstance(v, list) or isinstance(v, tuple): + for var in v: + var.stop_gradient = True self._trace_op(op, kwargs.get("stop_gradient", False)) return op diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 140c0ff037..fe5014f5e6 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -332,21 +332,16 @@ class BatchNorm(layers.Layer): shape=param_shape, dtype=self._dtype, default_initializer=Constant(1.0)) - - # TODO(minqiyang): change stop_gradient sign to trainable to align with static graph - # # setting stop_gradient=True to reduce computation - # if use_global_stats and self._helper.param_attr.learning_rate == 0.: - # self._scale.stop_gradient = True + if use_global_stats and self._helper.param_attr.learning_rate == 0.: + self._scale.stop_gradient = True self._bias = self._helper.create_parameter( attr=self._helper.bias_attr, shape=param_shape, dtype=self._dtype, is_bias=True) - # TODO(minqiyang): change stop_gradient sign to trainable to align with static graph - # # setting stop_gradient=True to reduce computation - # if use_global_stats and self._helper.bias_attr.learning_rate == 0.: - # self._bias.stop_gradient = True + if use_global_stats and self._helper.bias_attr.learning_rate == 0.: + self._bias.stop_gradient = True self._mean = self._helper.create_parameter( attr=ParamAttr( diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 14f4276e2f..e0e781a322 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -387,7 +387,7 @@ class Optimizer(object): params_grads = [] for param in parameters: - if param.stop_gradient: + if param.stop_gradient or not param.trainable: continue # create gradient variable grad_var = Variable( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index d0a5a88317..91637cac5b 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -98,7 +98,7 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): - def test_mnist_cpu_float32(self): + def test_mnist_float32(self): seed = 90 with fluid.imperative.guard(): @@ -196,11 +196,10 @@ class TestImperativeMnist(unittest.TestCase): static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): - self.assertTrue( - np.allclose(value.all(), dy_param_init_value[key].all())) - self.assertTrue(np.allclose(static_out.all(), dy_out.all())) + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value.all(), dy_param_value[key].all())) + self.assertTrue(np.allclose(value, dy_param_value[key])) if __name__ == '__main__': From 79d62c5402a89276dfe9e3d798cf9fc0fc5cb9cc Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 28 Jan 2019 14:20:25 +0800 Subject: [PATCH 2/6] Fix mnist --- python/paddle/fluid/framework.py | 12 +---- python/paddle/fluid/imperative/layers.py | 23 ++++++++- .../fluid/tests/unittests/CMakeLists.txt | 3 ++ .../unittests/test_imperative_optimizer.py | 22 ++++---- .../tests/unittests/test_imperative_resnet.py | 51 ++++++++++--------- 5 files changed, 67 insertions(+), 44 deletions(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 17798e359c..4692f20c1b 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -1308,16 +1308,8 @@ class Block(object): attrs=kwargs.get("attrs", None)) self.ops.append(op) - # set stop_gradient in static mode - if kwargs.get("stop_gradient", False): - outputs = kwargs.get("outputs", None) - if outputs is not None: - for k, v in six.iteritems(outputs): - if isinstance(v, Variable): - v.stop_gradient = True - elif isinstance(v, list) or isinstance(v, tuple): - for var in v: - var.stop_gradient = True + # TODO(minqiyang): add stop_gradient support in static mode too. + # currently, we only support stop_gradient in imperative mode. self._trace_op(op, kwargs.get("stop_gradient", False)) return op diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index f457f56203..57c45f764b 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -15,6 +15,7 @@ import contextlib import sys import numpy as np +import collections from paddle.fluid import core from paddle.fluid import framework @@ -31,11 +32,29 @@ class Layer(core.Layer): self._dtype = dtype def parameters(self): - return [] + params = [] + for key in self.__dict__.keys(): + value = self.__dict__[key] + if isinstance(value, framework.Parameter): + params.append(value) + elif isinstance(value, core.Layer): + params.extend(value.parameters()) + elif isinstance(value, collections.Container): + if len(value) == 0: + continue + if isinstance(value[0], framework.Parameter): + params.extend(value) + elif isinstance(value[0], core.Layer): + for v in value: + params.extend(v.parameters()) + + return params def clear_gradients(self): + print([p.name for p in self.parameters()]) for p in self.parameters(): - p._clear_gradient() + if p.name not in set(['batch_norm_0.w_2', 'batch_norm_0.w_1']): + p._clear_gradient() def _build_once(self, inputs): pass diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index c23dfa01e7..7e693c6a41 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -85,6 +85,7 @@ list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) +list(REMOVE_ITEM TEST_OPS test_imperative_optimizer) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -94,6 +95,8 @@ py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL) py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS FLAGS_cudnn_deterministic=1) +py_test_modules(test_imperative_optimizer MODULES test_imperative_optimizer ENVS + FLAGS_cudnn_deterministic=1) if(WITH_DISTRIBUTE) py_test_modules(test_dist_train MODULES test_dist_train SERIAL) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py index 91637cac5b..08b155acc6 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer.py @@ -82,13 +82,14 @@ class MNIST(fluid.imperative.Layer): self._simple_img_conv_pool_2 = SimpleImgConvPool( 20, 50, 5, 2, 2, act="relu") - pool_2_shape = 50 * 8 * 8 + pool_2_shape = 50 * 4 * 4 SIZE = 10 scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 self._fc = FC(10, param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale))) + loc=0.0, scale=scale)), + act="softmax") def forward(self, inputs): x = self._simple_img_conv_pool_1(inputs) @@ -100,7 +101,7 @@ class MNIST(fluid.imperative.Layer): class TestImperativeMnist(unittest.TestCase): def test_mnist_float32(self): seed = 90 - + batch_num = 2 with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -112,15 +113,15 @@ class TestImperativeMnist(unittest.TestCase): dy_param_init_value = {} for batch_id, data in enumerate(train_reader()): - if batch_id >= 2: + if batch_id >= batch_num: break - x_data = np.array( + dy_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( 128, 1) - img = to_variable(x_data) + img = to_variable(dy_x_data) label = to_variable(y_data) label._stop_gradient = True @@ -136,6 +137,7 @@ class TestImperativeMnist(unittest.TestCase): avg_loss._backward() sgd.minimize(avg_loss) + mnist.clear_gradients() dy_param_value = {} for param in fluid.default_main_program().global_block( ).all_parameters(): @@ -175,10 +177,10 @@ class TestImperativeMnist(unittest.TestCase): static_param_init_value[static_param_name_list[i]] = out[i] for batch_id, data in enumerate(train_reader()): - if batch_id >= 2: + if batch_id >= batch_num: break - x_data = np.array( + static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [128, 1]) @@ -186,7 +188,7 @@ class TestImperativeMnist(unittest.TestCase): fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": x_data, + feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) @@ -197,7 +199,9 @@ class TestImperativeMnist(unittest.TestCase): for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) + self.assertTrue(np.allclose(static_out, dy_out)) + for key, value in six.iteritems(static_param_value): self.assertTrue(np.allclose(value, dy_param_value[key])) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 87a72dd04e..dfaaae0de3 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -168,22 +168,22 @@ class ResNet(fluid.imperative.Layer): self.pool2d_max = Pool2D( pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - self.bottleneck_block_list = [] - num_channels = 64 - for block in range(len(depth)): - shortcut = False - for i in range(depth[block]): - bottleneck_block = BottleneckBlock( - num_channels=num_channels, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut) - num_channels = bottleneck_block._num_channels_out - self.bottleneck_block_list.append(bottleneck_block) - shortcut = True - - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + # self.bottleneck_block_list = [] + # num_channels = 64 + # for block in range(len(depth)): + # shortcut = False + # for i in range(depth[block]): + # bottleneck_block = BottleneckBlock( + # num_channels=num_channels, + # num_filters=num_filters[block], + # stride=2 if i == 0 and block != 0 else 1, + # shortcut=shortcut) + # num_channels = bottleneck_block._num_channels_out + # self.bottleneck_block_list.append(bottleneck_block) + # shortcut = True + + # self.pool2d_avg = Pool2D( + # pool_size=7, pool_type='avg', global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) @@ -196,9 +196,9 @@ class ResNet(fluid.imperative.Layer): def forward(self, inputs): y = self.conv(inputs) y = self.pool2d_max(y) - for bottleneck_block in self.bottleneck_block_list: - y = bottleneck_block(y) - y = self.pool2d_avg(y) + # for bottleneck_block in self.bottleneck_block_list: + # y = bottleneck_block(y) + # y = self.pool2d_avg(y) y = self.out(y) return y @@ -209,7 +209,7 @@ class TestImperativeResnet(unittest.TestCase): batch_size = train_parameters["batch_size"] batch_num = 1 - with fluid.imperative.guard(): + with fluid.imperative.guard(place=fluid.CPUPlace()): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -264,6 +264,7 @@ class TestImperativeResnet(unittest.TestCase): )] = np_array optimizer.minimize(avg_loss) + resnet.clear_gradients() dy_param_value = {} for param in fluid.default_main_program().global_block( @@ -274,8 +275,9 @@ class TestImperativeResnet(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CPUPlace( - ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace()) + # exe = fluid.Executor(fluid.CPUPlace( + # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) resnet = ResNet() optimizer = optimizer_setting(train_parameters) @@ -345,6 +347,7 @@ class TestImperativeResnet(unittest.TestCase): static_grad_value[static_grad_name_list[ i - grad_start_pos]] = out[i] + print(static_out, dy_out) self.assertTrue(np.allclose(static_out, dy_out)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) @@ -355,7 +358,9 @@ class TestImperativeResnet(unittest.TestCase): self.assertEqual(len(dy_grad_value), len(static_grad_value)) for key, value in six.iteritems(static_grad_value): - self.assertTrue(np.allclose(value, dy_grad_value[key])) + if not np.allclose(value, dy_grad_value[key]): + print(key) + #self.assertTrue(np.allclose(value, dy_grad_value[key])) self.assertTrue(np.isfinite(value.all())) self.assertFalse(np.isnan(value.any())) From 5c7768776c2a0b0a3b7c39e618897d17bb5bf882 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 28 Jan 2019 17:00:04 +0800 Subject: [PATCH 3/6] Fix batch_norm's stop_gradient bug test=develop --- paddle/fluid/imperative/layer.cc | 2 ++ paddle/fluid/imperative/layer.h | 9 +++++++-- paddle/fluid/imperative/tracer.cc | 6 ++++-- python/paddle/fluid/imperative/nn.py | 4 ++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 8029129b9a..64d4d999d1 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -156,6 +156,8 @@ class Autograd { for (auto it : candidate->pre_ops_) { for (OpBase* pre_op : it.second) { if (!pre_op) continue; + VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- " + << it.first << " <---- " << pre_op->op_desc_->Type(); if (visited.find(pre_op) == visited.end()) { visited.insert(pre_op); queue.push_back(pre_op); diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 633924aa41..0151a80816 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -28,6 +28,7 @@ #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/imperative/type_defs.h" @@ -148,8 +149,12 @@ class VarBase { } void ClearGradient() { - delete grads_; - grads_ = new VarBase(true); + VLOG(1) << "clear gradient of " << var_desc_->Name(); + auto grads_t = grads_->var_->GetMutable(); + operators::math::set_constant( + *(platform::DeviceContextPool::Instance().Get( + grads_->var_->Get().place())), + grads_t, 0.0); } framework::LoDTensor& GradValue(); diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 5b87839f45..c8af936c33 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -83,11 +83,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->input_vars_ = inputs; for (auto it : op->input_vars_) { auto& invars = invars_map[it.first]; + invars.reserve(it.second.size()); for (VarBase* inp : it.second) { PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->op_desc_->Type(), inp->var_desc_->Name()); - invars.push_back(inp->var_); + invars.emplace_back(inp->var_); vars[inp->var_desc_->Name()] = inp; if (inp->PreOp()) { op->pre_ops_[it.first].push_back(inp->PreOp()); @@ -104,9 +105,10 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, for (auto it : op->output_vars_) { auto& outvars = outvars_map[it.first]; const std::vector& outputs = it.second; + outvars.reserve(outputs.size()); for (size_t i = 0; i < outputs.size(); ++i) { VarBase* out = outputs[i]; - outvars.push_back(out->var_); + outvars.emplace_back(out->var_); vars[out->var_desc_->Name()] = out; framework::VarDesc* var_desc = block->FindVar(out->var_desc_->Name()); diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index fe5014f5e6..543f573890 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -334,6 +334,7 @@ class BatchNorm(layers.Layer): default_initializer=Constant(1.0)) if use_global_stats and self._helper.param_attr.learning_rate == 0.: self._scale.stop_gradient = True + self._scale._stop_gradient = True self._bias = self._helper.create_parameter( attr=self._helper.bias_attr, @@ -342,6 +343,7 @@ class BatchNorm(layers.Layer): is_bias=True) if use_global_stats and self._helper.bias_attr.learning_rate == 0.: self._bias.stop_gradient = True + self._bias._stop_gradient = True self._mean = self._helper.create_parameter( attr=ParamAttr( @@ -352,6 +354,7 @@ class BatchNorm(layers.Layer): shape=param_shape, dtype=self._dtype) self._mean.stop_gradient = True + self._mean._stop_gradient = True self._variance = self._helper.create_parameter( attr=ParamAttr( @@ -362,6 +365,7 @@ class BatchNorm(layers.Layer): shape=param_shape, dtype=self._dtype) self._variance.stop_gradient = True + self._variance._stop_gradient = True self._in_place = in_place self._momentum = momentum From edf742cfacd8e6f4b9e9c33d619f1d12aa9d8aa6 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 28 Jan 2019 17:03:19 +0800 Subject: [PATCH 4/6] Polish code test=develop --- python/paddle/fluid/framework.py | 9 +++++++-- python/paddle/fluid/imperative/nn.py | 4 ---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 4692f20c1b..195245a12f 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -442,11 +442,16 @@ class Variable(object): @property def _stop_gradient(self): - return self._ivar.stop_gradient + if _in_imperative_mode(): + return self._ivar.stop_gradient + else: + return self.stop_gradient @_stop_gradient.setter def _stop_gradient(self, s): - self._ivar.stop_gradient = s + if _in_imperative_mode(): + self._ivar.stop_gradient = s + self.stop_gradient = s @property def persistable(self): diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 543f573890..dc90603c37 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -333,7 +333,6 @@ class BatchNorm(layers.Layer): dtype=self._dtype, default_initializer=Constant(1.0)) if use_global_stats and self._helper.param_attr.learning_rate == 0.: - self._scale.stop_gradient = True self._scale._stop_gradient = True self._bias = self._helper.create_parameter( @@ -342,7 +341,6 @@ class BatchNorm(layers.Layer): dtype=self._dtype, is_bias=True) if use_global_stats and self._helper.bias_attr.learning_rate == 0.: - self._bias.stop_gradient = True self._bias._stop_gradient = True self._mean = self._helper.create_parameter( @@ -353,7 +351,6 @@ class BatchNorm(layers.Layer): do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=self._dtype) - self._mean.stop_gradient = True self._mean._stop_gradient = True self._variance = self._helper.create_parameter( @@ -364,7 +361,6 @@ class BatchNorm(layers.Layer): do_model_average=do_model_average_for_mean_and_var), shape=param_shape, dtype=self._dtype) - self._variance.stop_gradient = True self._variance._stop_gradient = True self._in_place = in_place From 49a7fba8485c71d0da32a31bb56ef88035a7832f Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 28 Jan 2019 17:42:23 +0800 Subject: [PATCH 5/6] Polish code test=develop --- paddle/fluid/imperative/layer.h | 6 ++- python/paddle/fluid/imperative/layers.py | 3 +- .../tests/unittests/test_imperative_resnet.py | 50 +++++++++---------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 1d109259f3..46107341a4 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -141,11 +141,13 @@ class VarBase { void RunBackward(); void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, - int pre_op_out_idx, bool stop_gradient) { + int pre_op_out_idx, bool pre_op_stop_gradient) { pre_op_ = pre_op; pre_op_out_name_ = pre_op_out_name; pre_op_out_idx_ = pre_op_out_idx; - stop_gradient_ = stop_gradient; + if (pre_op_stop_gradient) { + stop_gradient_ = pre_op_stop_gradient; + } } void ClearGradient() { diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index 57c45f764b..c338c65a76 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -51,9 +51,8 @@ class Layer(core.Layer): return params def clear_gradients(self): - print([p.name for p in self.parameters()]) for p in self.parameters(): - if p.name not in set(['batch_norm_0.w_2', 'batch_norm_0.w_1']): + if not p._stop_gradient: p._clear_gradient() def _build_once(self, inputs): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index dfaaae0de3..c27fd0b802 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -168,22 +168,22 @@ class ResNet(fluid.imperative.Layer): self.pool2d_max = Pool2D( pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - # self.bottleneck_block_list = [] - # num_channels = 64 - # for block in range(len(depth)): - # shortcut = False - # for i in range(depth[block]): - # bottleneck_block = BottleneckBlock( - # num_channels=num_channels, - # num_filters=num_filters[block], - # stride=2 if i == 0 and block != 0 else 1, - # shortcut=shortcut) - # num_channels = bottleneck_block._num_channels_out - # self.bottleneck_block_list.append(bottleneck_block) - # shortcut = True - - # self.pool2d_avg = Pool2D( - # pool_size=7, pool_type='avg', global_pooling=True) + self.bottleneck_block_list = [] + num_channels = 64 + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = BottleneckBlock( + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut) + num_channels = bottleneck_block._num_channels_out + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) import math stdv = 1.0 / math.sqrt(2048 * 1.0) @@ -196,9 +196,9 @@ class ResNet(fluid.imperative.Layer): def forward(self, inputs): y = self.conv(inputs) y = self.pool2d_max(y) - # for bottleneck_block in self.bottleneck_block_list: - # y = bottleneck_block(y) - # y = self.pool2d_avg(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) y = self.out(y) return y @@ -209,7 +209,7 @@ class TestImperativeResnet(unittest.TestCase): batch_size = train_parameters["batch_size"] batch_num = 1 - with fluid.imperative.guard(place=fluid.CPUPlace()): + with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -275,9 +275,8 @@ class TestImperativeResnet(unittest.TestCase): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CPUPlace()) - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) resnet = ResNet() optimizer = optimizer_setting(train_parameters) @@ -347,7 +346,6 @@ class TestImperativeResnet(unittest.TestCase): static_grad_value[static_grad_name_list[ i - grad_start_pos]] = out[i] - print(static_out, dy_out) self.assertTrue(np.allclose(static_out, dy_out)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) @@ -358,9 +356,7 @@ class TestImperativeResnet(unittest.TestCase): self.assertEqual(len(dy_grad_value), len(static_grad_value)) for key, value in six.iteritems(static_grad_value): - if not np.allclose(value, dy_grad_value[key]): - print(key) - #self.assertTrue(np.allclose(value, dy_grad_value[key])) + self.assertTrue(np.allclose(value, dy_grad_value[key])) self.assertTrue(np.isfinite(value.all())) self.assertFalse(np.isnan(value.any())) From 07822fef2c692dd884abb7aa54b416a70409bb9c Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 28 Jan 2019 18:43:51 +0800 Subject: [PATCH 6/6] Clear all parameters' gradient test=develop --- paddle/fluid/imperative/layer.h | 12 +++++++----- python/paddle/fluid/imperative/layers.py | 3 +-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 46107341a4..78205486c5 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -152,11 +152,13 @@ class VarBase { void ClearGradient() { VLOG(1) << "clear gradient of " << var_desc_->Name(); - auto grads_t = grads_->var_->GetMutable(); - operators::math::set_constant( - *(platform::DeviceContextPool::Instance().Get( - grads_->var_->Get().place())), - grads_t, 0.0); + if (grads_ && grads_->var_ && grads_->var_->IsInitialized()) { + auto grads_t = grads_->var_->GetMutable(); + operators::math::set_constant( + *(platform::DeviceContextPool::Instance().Get( + grads_->var_->Get().place())), + grads_t, 0.0); + } } framework::LoDTensor& GradValue(); diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index c338c65a76..71ff95bdea 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -52,8 +52,7 @@ class Layer(core.Layer): def clear_gradients(self): for p in self.parameters(): - if not p._stop_gradient: - p._clear_gradient() + p._clear_gradient() def _build_once(self, inputs): pass