remove params in Tracer object (in dygraph) (#20815)

* remove params in Tracer object, test=develop

* Repair failed optest, test=develop

* remove build_once & name_scope (Conv2D)
test=develop

* fix unittest
test=develop

* Conv2DTranspose

* Conv3D & Conv3DTranspose
test=develop

* Pool2D & BatchNorm

* Embedding

* LayerNorm

* GRUUnit & NCE

* PRelu

* BilinearTensorProduct

* GroupNorm & SpectralNorm

* TreeConv
test=develop

* fix LayerNorm in transformer unnittest
test=develop

* disable LayerNorm or BatchNorm in multicard
test=develop

* refine Layer.create_parameter api
test=develop

* refine LayerNorm, remove begin_norm_axis param, add normed shape check
test=develop

* LayerNorm bug fix
test=develop

* fix optest,test=develop

* fix optest, test=develop

* fix optest for pass parameter_list when constructing an Optimizer class instance, test=develop

* polish code for better code style, test=develop

* fix se_resnext optest, test=develop

* polish code for better code style, test=develop

Co-authored-by: songyouwei <youwei0314@gmail.com>
release/1.7
zhongpu 6 years ago committed by hong
parent c3e1954918
commit dca075839b

@ -154,14 +154,14 @@ def guard(place=None):
yield yield
def _print_debug_msg(limit=5, is_test=False): def _print_debug_msg(parameter_list, limit=5, is_test=False):
if not core._is_dygraph_debug_enabled(): if not core._is_dygraph_debug_enabled():
logging.warn( logging.warn(
'Debug mode is not enabled. Please set FLAGS_dygraph_debug=1 to enable debug' 'Debug mode is not enabled. Please set FLAGS_dygraph_debug=1 to enable debug'
) )
return return
unique_name_size = len(framework.unique_name.generator.ids) unique_name_size = len(framework.unique_name.generator.ids)
tracer_var_size = len(framework._dygraph_tracer()._vars) tracer_var_size = len(parameter_list)
alive_cpp_var_size = len(core.VarBase._alive_vars()) alive_cpp_var_size = len(core.VarBase._alive_vars())
if not is_test: if not is_test:
logging.warn( logging.warn(

@ -53,7 +53,8 @@ def save_dygraph(state_dict, model_path):
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ) adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
parameter_list = emb.parameters() )
state_dict = adam.state_dict() state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
@ -96,7 +97,8 @@ def load_dygraph(model_path):
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ) adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
parameter_list = emb.parameters() )
state_dict = adam.state_dict() state_dict = adam.state_dict()
fluid.save_dygraph( state_dict, "padle_dy") fluid.save_dygraph( state_dict, "padle_dy")

@ -145,9 +145,13 @@ class Layer(core.Layer):
list of :ref:`api_guide_Variable_en` : a list of Parameters. list of :ref:`api_guide_Variable_en` : a list of Parameters.
""" """
ret = [p for p in self._parameters.values()] ret = [p for p in self._parameters.values()]
parameters_set = set(ret)
if include_sublayers: if include_sublayers:
for l in self._sub_layers.values(): for l in self._sub_layers.values():
for p in l.parameters(include_sublayers): for p in l.parameters(include_sublayers):
if p in parameters_set:
continue
parameters_set.add(p)
ret.append(p) ret.append(p)
return ret return ret
@ -261,11 +265,6 @@ class Layer(core.Layer):
value.set_value(self._loaddict_holder[value.name]) value.set_value(self._loaddict_holder[value.name])
if name in params:
# remove unused param in tracer
if framework._dygraph_tracer_ is not None:
framework._dygraph_tracer_._vars.pop(params[name].name,
None)
params[name] = value params[name] = value
elif isinstance(value, core.Layer): elif isinstance(value, core.Layer):
layers = self.__dict__.get('_sub_layers', None) layers = self.__dict__.get('_sub_layers', None)

@ -104,8 +104,10 @@ class PiecewiseDecay(LearningRateDecay):
boundaries = [10000, 20000] boundaries = [10000, 20000]
values = [1.0, 0.5, 0.1] values = [1.0, 0.5, 0.1]
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10] )
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0) ) learning_rate=fluid.dygraph.PiecewiseDecay(boundaries, values, 0),
parameter_list = emb.parameters() )
""" """
def __init__(self, boundaries, values, begin, step=1, dtype='float32'): def __init__(self, boundaries, values, begin, step=1, dtype='float32'):
@ -323,12 +325,14 @@ class InverseTimeDecay(LearningRateDecay):
import paddle.fluid as fluid import paddle.fluid as fluid
base_lr = 0.1 base_lr = 0.1
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
sgd_optimizer = fluid.optimizer.SGD( sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.dygraph.InverseTimeDecay( learning_rate=fluid.dygraph.InverseTimeDecay(
learning_rate=base_lr, learning_rate=base_lr,
decay_steps=10000, decay_steps=10000,
decay_rate=0.5, decay_rate=0.5,
staircase=True)) staircase=True),
parameter_list = emb.parameters())
""" """
@ -404,9 +408,11 @@ class PolynomialDecay(LearningRateDecay):
total_step = 5000 total_step = 5000
end_lr = 0 end_lr = 0
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( [10, 10])
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.PolynomialDecay( learning_rate = fluid.dygraph.PolynomialDecay(
start_lr, total_step, end_lr, power=1.0) ) start_lr, total_step, end_lr, power=1.0),
parameter_list = emb.parameters())
""" """
@ -536,10 +542,12 @@ class NoamDecay(LearningRateDecay):
warmup_steps = 100 warmup_steps = 100
learning_rate = 0.01 learning_rate = 0.01
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding([10, 10])
optimizer = fluid.optimizer.SGD( optimizer = fluid.optimizer.SGD(
learning_rate = fluid.dygraph.NoamDecay( learning_rate = fluid.dygraph.NoamDecay(
1/(warmup_steps *(learning_rate ** 2)), 1/(warmup_steps *(learning_rate ** 2)),
warmup_steps) ) warmup_steps),
parameter_list = emb.parameters())
""" """
def __init__(self, d_model, warmup_steps, begin=1, step=1, dtype='float32'): def __init__(self, d_model, warmup_steps, begin=1, step=1, dtype='float32'):

@ -31,16 +31,8 @@ class Tracer(core.Tracer):
def __init__(self): def __init__(self):
super(Tracer, self).__init__() super(Tracer, self).__init__()
self._vars = defaultdict()
self._train_mode = True self._train_mode = True
def trace_var(self, name, var):
self._vars[name] = var
def all_parameters(self):
return list((item for name, item in six.iteritems(self._vars)
if isinstance(item, framework.Parameter)))
def trace_op(self, type, inputs, outputs, attrs, stop_gradient=False): def trace_op(self, type, inputs, outputs, attrs, stop_gradient=False):
self.trace(type, inputs, outputs, attrs, self.trace(type, inputs, outputs, attrs,
framework._current_expected_place(), self._train_mode and framework._current_expected_place(), self._train_mode and

@ -4676,8 +4676,6 @@ class ParamBase(core.VarBase):
# self.block = default_main_program().global_block() # self.block = default_main_program().global_block()
_dygraph_tracer().trace_var(name, self)
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)

File diff suppressed because it is too large Load Diff

@ -26,7 +26,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph import paddle.fluid.dygraph as dygraph
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
@ -79,8 +79,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
@ -88,19 +88,21 @@ class MNIST(fluid.dygraph.Layer):
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 self.pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(), self._fc = Linear(
10, self.pool_2_shape,
param_attr=fluid.param_attr.ParamAttr( 10,
initializer=fluid.initializer.NormalInitializer( param_attr=fluid.param_attr.ParamAttr(
loc=0.0, scale=scale)), initializer=fluid.initializer.NormalInitializer(
act="softmax") loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs, label): def forward(self, inputs, label):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x) x = self._simple_img_conv_pool_2(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
cost = self._fc(x) cost = self._fc(x)
loss = fluid.layers.cross_entropy(cost, label) loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss) avg_loss = fluid.layers.mean(loss)
@ -109,10 +111,11 @@ class MNIST(fluid.dygraph.Layer):
class TestMnist(TestParallelDyGraphRunnerBase): class TestMnist(TestParallelDyGraphRunnerBase):
def get_model(self): def get_model(self):
model = MNIST("mnist") model = MNIST()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=2, drop_last=True) paddle.dataset.mnist.train(), batch_size=2, drop_last=True)
opt = fluid.optimizer.Adam(learning_rate=1e-3) opt = fluid.optimizer.Adam(
learning_rate=1e-3, parameter_list=model.parameters())
return model, train_reader, opt return model, train_reader, opt
def run_one_loop(self, model, opt, data): def run_one_loop(self, model, opt, data):

@ -27,7 +27,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph import paddle.fluid.dygraph as dygraph
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, BatchNorm
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
import math import math
@ -54,7 +54,7 @@ train_parameters = {
} }
def optimizer_setting(params): def optimizer_setting(params, parameter_list=None):
ls = params["learning_strategy"] ls = params["learning_strategy"]
if "total_images" not in params: if "total_images" not in params:
total_images = 6149 total_images = 6149
@ -66,11 +66,19 @@ def optimizer_setting(params):
bd = [step * e for e in ls["epochs"]] bd = [step * e for e in ls["epochs"]]
lr = params["lr"] lr = params["lr"]
num_epochs = params["num_epochs"] num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum( if fluid.in_dygraph_mode():
learning_rate=fluid.layers.cosine_decay( optimizer = fluid.optimizer.Momentum(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs), learning_rate=fluid.layers.cosine_decay(
momentum=momentum_rate, learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
regularization=fluid.regularizer.L2Decay(l2_decay)) momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
else:
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer return optimizer
@ -107,27 +115,29 @@ class ConvBNLayer(fluid.dygraph.Layer):
class SqueezeExcitation(fluid.dygraph.Layer): class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio): def __init__(self, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope) super(SqueezeExcitation, self).__init__()
self._num_channels = num_channels
self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True) self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0) stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._squeeze = FC( self._squeeze = Linear(
self.full_name(), num_channels,
size=num_channels // reduction_ratio, num_channels // reduction_ratio,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)), initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='relu') act='relu')
stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0) stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0)
self._excitation = FC( self._excitation = Linear(
self.full_name(), num_channels // reduction_ratio,
size=num_channels, num_channels,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)), initializer=fluid.initializer.Uniform(-stdv, stdv)),
act='sigmoid') act='sigmoid')
def forward(self, input): def forward(self, input):
y = self._pool(input) y = self._pool(input)
y = fluid.layers.reshape(y, shape=[-1, self._num_channels])
y = self._squeeze(y) y = self._squeeze(y)
y = self._excitation(y) y = self._excitation(y)
y = fluid.layers.elementwise_mul(x=input, y=y, axis=0) y = fluid.layers.elementwise_mul(x=input, y=y, axis=0)
@ -163,9 +173,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
act=None) act=None)
self.scale = SqueezeExcitation( self.scale = SqueezeExcitation(
self.full_name(), num_channels=num_filters * 2, reduction_ratio=reduction_ratio)
num_channels=num_filters * 2,
reduction_ratio=reduction_ratio)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
@ -194,8 +202,8 @@ class BottleneckBlock(fluid.dygraph.Layer):
class SeResNeXt(fluid.dygraph.Layer): class SeResNeXt(fluid.dygraph.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, layers=50, class_dim=102):
super(SeResNeXt, self).__init__(name_scope) super(SeResNeXt, self).__init__()
self.layers = layers self.layers = layers
supported_layers = [50, 101, 152] supported_layers = [50, 101, 152]
@ -276,10 +284,13 @@ class SeResNeXt(fluid.dygraph.Layer):
pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(), self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1
size=class_dim,
param_attr=fluid.param_attr.ParamAttr( self.out = Linear(
initializer=fluid.initializer.Uniform(-stdv, stdv))) self.pool2d_avg_output,
class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs): def forward(self, inputs):
if self.layers == 50 or self.layers == 101: if self.layers == 50 or self.layers == 101:
@ -294,18 +305,20 @@ class SeResNeXt(fluid.dygraph.Layer):
for bottleneck_block in self.bottleneck_block_list: for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y) y = bottleneck_block(y)
y = self.pool2d_avg(y) y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y) y = self.out(y)
return y return y
class TestSeResNeXt(TestParallelDyGraphRunnerBase): class TestSeResNeXt(TestParallelDyGraphRunnerBase):
def get_model(self): def get_model(self):
model = SeResNeXt("se-resnext") model = SeResNeXt()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.flowers.test(use_xmap=False), paddle.dataset.flowers.test(use_xmap=False),
batch_size=train_parameters["batch_size"], batch_size=train_parameters["batch_size"],
drop_last=True) drop_last=True)
optimizer = optimizer_setting(train_parameters) optimizer = optimizer_setting(
train_parameters, parameter_list=model.parameters())
return model, train_reader, optimizer return model, train_reader, optimizer
def run_one_loop(self, model, opt, data): def run_one_loop(self, model, opt, data):

@ -23,7 +23,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
@ -75,8 +75,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
@ -84,19 +84,21 @@ class MNIST(fluid.dygraph.Layer):
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 self.pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 100 #10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(), self._fc = Linear(
10, self.pool_2_shape,
param_attr=fluid.param_attr.ParamAttr( SIZE,
initializer=fluid.initializer.NormalInitializer( param_attr=fluid.param_attr.ParamAttr(
loc=0.0, scale=scale)), initializer=fluid.initializer.NormalInitializer(
act="softmax") loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs): def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x) x = self._simple_img_conv_pool_2(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
x = self._fc(x) x = self._fc(x)
return x return x
@ -109,8 +111,9 @@ class TestDygraphMultiForward(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist") mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=mnist.parameters())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
@ -145,7 +148,7 @@ class TestDygraphMultiForward(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist") mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)

@ -258,7 +258,9 @@ class TestImperativeAutoPrune(unittest.TestCase):
fc2_origin = fc2._w.numpy() fc2_origin = fc2._w.numpy()
fc2._w.stop_gradient = True fc2._w.stop_gradient = True
out2.backward() out2.backward()
optimizer = fluid.optimizer.SGD(learning_rate=0.003) optimizer = fluid.optimizer.SGD(
learning_rate=0.003,
parameter_list=(fc.parameters() + fc2.parameters()))
optimizer.minimize(out2) optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy())) self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
self.assertFalse(np.array_equal(fc_origin, fc._w.numpy())) self.assertFalse(np.array_equal(fc_origin, fc._w.numpy()))
@ -279,7 +281,9 @@ class TestImperativeAutoPrune(unittest.TestCase):
fc2_origin = fc2._w.numpy() fc2_origin = fc2._w.numpy()
out2.stop_gradient = True out2.stop_gradient = True
out2.backward() out2.backward()
optimizer = fluid.optimizer.SGD(learning_rate=0.003) optimizer = fluid.optimizer.SGD(
learning_rate=0.003,
parameter_list=(fc.parameters() + fc2.parameters()))
optimizer.minimize(out2) optimizer.minimize(out2)
self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy())) self.assertTrue(np.array_equal(fc2_origin, fc2._w.numpy()))
self.assertTrue(np.array_equal(fc_origin, fc._w.numpy())) self.assertTrue(np.array_equal(fc_origin, fc._w.numpy()))
@ -320,7 +324,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
place = fluid.CPUPlace() place = fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
model = MyLayer("mylayer", vocab_size, size) model = MyLayer("mylayer", vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(0.001) optimizer = fluid.optimizer.AdamOptimizer(
0.001, parameter_list=model.parameters())
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
indices = fluid.dygraph.to_variable(indices) indices = fluid.dygraph.to_variable(indices)
@ -338,7 +343,8 @@ class TestImperativeAutoPrune(unittest.TestCase):
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
model = MyLayer2("mylayer", vocab_size, size) model = MyLayer2("mylayer", vocab_size, size)
optimizer = fluid.optimizer.AdamOptimizer(0.001) optimizer = fluid.optimizer.AdamOptimizer(
0.001, parameter_list=model.parameters())
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001) grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(0.001)
indices = fluid.dygraph.to_variable(indices) indices = fluid.dygraph.to_variable(indices)

@ -58,7 +58,7 @@ class TestDygraphDebugString(unittest.TestCase):
out.backward() out.backward()
mlp.clear_gradients() mlp.clear_gradients()
unique_name_tmp, trace_var_tmp, alive_var_tmp = fluid.dygraph.base._print_debug_msg( unique_name_tmp, trace_var_tmp, alive_var_tmp = fluid.dygraph.base._print_debug_msg(
is_test=True) mlp.parameters(), is_test=True)
if i > 0: if i > 0:
self.assertGreaterEqual(unique_name, unique_name_tmp) self.assertGreaterEqual(unique_name, unique_name_tmp)
self.assertGreaterEqual(trace_var, trace_var_tmp) self.assertGreaterEqual(trace_var, trace_var_tmp)
@ -68,7 +68,7 @@ class TestDygraphDebugString(unittest.TestCase):
trace_var = trace_var_tmp trace_var = trace_var_tmp
alive_var = alive_var_tmp alive_var = alive_var_tmp
try: try:
fluid.dygraph.base._print_debug_msg() fluid.dygraph.base._print_debug_msg(mlp.parameters())
except Exception as e: except Exception as e:
raise RuntimeError( raise RuntimeError(
"No Exception is accepted in _print_debug_msg, but we got: {}". "No Exception is accepted in _print_debug_msg, but we got: {}".

@ -23,6 +23,7 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import Linear
# Can use Amusic dataset as the DeepCF describes. # Can use Amusic dataset as the DeepCF describes.
DATA_PATH = os.environ.get('DATA_PATH', '') DATA_PATH = os.environ.get('DATA_PATH', '')
@ -33,10 +34,10 @@ NUM_EPOCHES = int(os.environ.get('NUM_EPOCHES', 1))
class DMF(fluid.Layer): class DMF(fluid.Layer):
def __init__(self, name_scope): def __init__(self):
super(DMF, self).__init__(name_scope) super(DMF, self).__init__()
self._user_latent = fluid.FC(self.full_name(), 256) self._user_latent = Linear(1000, 256)
self._item_latent = fluid.FC(self.full_name(), 256) self._item_latent = Linear(100, 256)
self._user_layers = [] self._user_layers = []
self._item_layers = [] self._item_layers = []
@ -45,11 +46,17 @@ class DMF(fluid.Layer):
self._user_layers.append( self._user_layers.append(
self.add_sublayer( self.add_sublayer(
'user_layer_%d' % i, 'user_layer_%d' % i,
fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) Linear(
256 if i == 0 else self._hid_sizes[i - 1],
self._hid_sizes[i],
act='relu')))
self._item_layers.append( self._item_layers.append(
self.add_sublayer( self.add_sublayer(
'item_layer_%d' % i, 'item_layer_%d' % i,
fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) Linear(
256 if i == 0 else self._hid_sizes[i - 1],
self._hid_sizes[i],
act='relu')))
def forward(self, users, items): def forward(self, users, items):
users = self._user_latent(users) users = self._user_latent(users)
@ -62,17 +69,20 @@ class DMF(fluid.Layer):
class MLP(fluid.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope): def __init__(self):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__()
self._user_latent = fluid.FC(self.full_name(), 256) self._user_latent = Linear(1000, 256)
self._item_latent = fluid.FC(self.full_name(), 256) self._item_latent = Linear(100, 256)
self._match_layers = [] self._match_layers = []
self._hid_sizes = [128, 64] self._hid_sizes = [128, 64]
for i in range(len(self._hid_sizes)): for i in range(len(self._hid_sizes)):
self._match_layers.append( self._match_layers.append(
self.add_sublayer( self.add_sublayer(
'match_layer_%d' % i, 'match_layer_%d' % i,
fluid.FC(self.full_name(), self._hid_sizes[i], act='relu'))) Linear(
256 * 2 if i == 0 else self._hid_sizes[i - 1],
self._hid_sizes[i],
act='relu')))
def forward(self, users, items): def forward(self, users, items):
users = self._user_latent(users) users = self._user_latent(users)
@ -85,8 +95,8 @@ class MLP(fluid.Layer):
class DeepCF(fluid.Layer): class DeepCF(fluid.Layer):
def __init__(self, name_scope, num_users, num_items, matrix): def __init__(self, num_users, num_items, matrix):
super(DeepCF, self).__init__(name_scope) super(DeepCF, self).__init__()
self._num_users = num_users self._num_users = num_users
self._num_items = num_items self._num_items = num_items
self._rating_matrix = self.create_parameter( self._rating_matrix = self.create_parameter(
@ -97,9 +107,9 @@ class DeepCF(fluid.Layer):
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix.stop_gradient = True self._rating_matrix.stop_gradient = True
self._mlp = MLP(self.full_name()) self._mlp = MLP()
self._dmf = DMF(self.full_name()) self._dmf = DMF()
self._match_fc = fluid.FC(self.full_name(), 1, act='sigmoid') self._match_fc = Linear(128, 1, act='sigmoid')
def forward(self, users, items): def forward(self, users, items):
# users_emb = self._user_emb(users) # users_emb = self._user_emb(users)
@ -208,7 +218,7 @@ class TestDygraphDeepCF(unittest.TestCase):
items = fluid.layers.data('items', [1], dtype='int32') items = fluid.layers.data('items', [1], dtype='int32')
labels = fluid.layers.data('labels', [1], dtype='float32') labels = fluid.layers.data('labels', [1], dtype='float32')
deepcf = DeepCF('deepcf', num_users, num_items, matrix) deepcf = DeepCF(num_users, num_items, matrix)
prediction = deepcf(users, items) prediction = deepcf(users, items)
loss = fluid.layers.reduce_sum( loss = fluid.layers.reduce_sum(
fluid.layers.log_loss(prediction, labels)) fluid.layers.log_loss(prediction, labels))
@ -237,8 +247,9 @@ class TestDygraphDeepCF(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
deepcf = DeepCF('deepcf', num_users, num_items, matrix) deepcf = DeepCF(num_users, num_items, matrix)
adam = fluid.optimizer.AdamOptimizer(0.01) adam = fluid.optimizer.AdamOptimizer(
0.01, parameter_list=deepcf.parameters())
for e in range(NUM_EPOCHES): for e in range(NUM_EPOCHES):
sys.stderr.write('epoch %d\n' % e) sys.stderr.write('epoch %d\n' % e)
for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE):
@ -261,8 +272,9 @@ class TestDygraphDeepCF(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
deepcf2 = DeepCF('deepcf', num_users, num_items, matrix) deepcf2 = DeepCF(num_users, num_items, matrix)
adam2 = fluid.optimizer.AdamOptimizer(0.01) adam2 = fluid.optimizer.AdamOptimizer(
0.01, parameter_list=deepcf2.parameters())
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
for e in range(NUM_EPOCHES): for e in range(NUM_EPOCHES):

@ -22,33 +22,35 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid import Conv2D, Pool2D, FC from paddle.fluid import Conv2D, Pool2D, Linear
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
class Discriminator(fluid.Layer): class Discriminator(fluid.Layer):
def __init__(self, name_scope): def __init__(self):
super(Discriminator, self).__init__(name_scope) super(Discriminator, self).__init__()
self._fc1 = FC(self.full_name(), size=32, act='elu') self._fc1 = Linear(1, 32, act='elu')
self._fc2 = FC(self.full_name(), size=1) self._fc2 = Linear(32, 1)
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._fc1(inputs)
return self._fc2(x) x = self._fc2(x)
return x
class Generator(fluid.Layer): class Generator(fluid.Layer):
def __init__(self, name_scope): def __init__(self):
super(Generator, self).__init__(name_scope) super(Generator, self).__init__()
self._fc1 = FC(self.full_name(), size=64, act='elu') self._fc1 = Linear(2, 64, act='elu')
self._fc2 = FC(self.full_name(), size=64, act='elu') self._fc2 = Linear(64, 64, act='elu')
self._fc3 = FC(self.full_name(), size=1) self._fc3 = Linear(64, 1)
def forward(self, inputs): def forward(self, inputs):
x = self._fc1(inputs) x = self._fc1(inputs)
x = self._fc2(x) x = self._fc2(x)
return self._fc3(x) x = self._fc3(x)
return x
class TestDygraphGAN(unittest.TestCase): class TestDygraphGAN(unittest.TestCase):
@ -65,8 +67,8 @@ class TestDygraphGAN(unittest.TestCase):
scope = fluid.core.Scope() scope = fluid.core.Scope()
with new_program_scope( with new_program_scope(
main=discriminate_p, startup=startup, scope=scope): main=discriminate_p, startup=startup, scope=scope):
discriminator = Discriminator("d") discriminator = Discriminator()
generator = Generator("g") generator = Generator()
img = fluid.layers.data( img = fluid.layers.data(
name="img", shape=[2, 1], append_batch_size=False) name="img", shape=[2, 1], append_batch_size=False)
@ -93,8 +95,8 @@ class TestDygraphGAN(unittest.TestCase):
sgd.minimize(d_loss) sgd.minimize(d_loss)
with new_program_scope(main=generate_p, startup=startup, scope=scope): with new_program_scope(main=generate_p, startup=startup, scope=scope):
discriminator = Discriminator("d") discriminator = Discriminator()
generator = Generator("g") generator = Generator()
noise = fluid.layers.data( noise = fluid.layers.data(
name="noise", shape=[2, 2], append_batch_size=False) name="noise", shape=[2, 2], append_batch_size=False)
@ -134,9 +136,12 @@ class TestDygraphGAN(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
discriminator = Discriminator("d") discriminator = Discriminator()
generator = Generator("g") generator = Generator()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3,
parameter_list=(
discriminator.parameters() + generator.parameters()))
d_real = discriminator(to_variable(np.ones([2, 1], np.float32))) d_real = discriminator(to_variable(np.ones([2, 1], np.float32)))
d_loss_real = fluid.layers.reduce_mean( d_loss_real = fluid.layers.reduce_mean(
@ -177,9 +182,12 @@ class TestDygraphGAN(unittest.TestCase):
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
discriminator2 = Discriminator("d") discriminator2 = Discriminator()
generator2 = Generator("g") generator2 = Generator()
sgd2 = SGDOptimizer(learning_rate=1e-3) sgd2 = SGDOptimizer(
learning_rate=1e-3,
parameter_list=(
discriminator2.parameters() + generator2.parameters()))
d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32))) d_real2 = discriminator2(to_variable(np.ones([2, 1], np.float32)))
d_loss_real2 = fluid.layers.reduce_mean( d_loss_real2 = fluid.layers.reduce_mean(

@ -131,7 +131,8 @@ class TestDygraphGNN(unittest.TestCase):
to_variable(labels)) to_variable(labels))
loss = fluid.layers.reduce_sum(loss) loss = fluid.layers.reduce_sum(loss)
loss.backward() loss.backward()
adam = AdamOptimizer(learning_rate=1e-3) adam = AdamOptimizer(
learning_rate=1e-3, parameter_list=model.parameters())
adam.minimize(loss) adam.minimize(loss)
model.clear_gradients() model.clear_gradients()
@ -156,7 +157,8 @@ class TestDygraphGNN(unittest.TestCase):
logits2, to_variable(labels2)) logits2, to_variable(labels2))
loss2 = fluid.layers.reduce_sum(loss2) loss2 = fluid.layers.reduce_sum(loss2)
loss2.backward() loss2.backward()
adam2 = AdamOptimizer(learning_rate=1e-3) adam2 = AdamOptimizer(
learning_rate=1e-3, parameter_list=model2.parameters())
adam2.minimize(loss2) adam2.minimize(loss2)
model2.clear_gradients() model2.clear_gradients()
loss2_value = loss2.numpy() loss2_value = loss2.numpy()

@ -105,7 +105,9 @@ class TestDygraphSimpleNet(unittest.TestCase):
is_sparse=is_sparse, is_sparse=is_sparse,
dtype=dtype) dtype=dtype)
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3,
parameter_list=simple_net.parameters())
dy_param_updated = dict() dy_param_updated = dict()
dy_param_init = dict() dy_param_init = dict()
dy_loss = None dy_loss = None

@ -23,7 +23,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from utils import DyGraphProgramDescTracerTestHelper, is_equal_program from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
@ -77,8 +77,8 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer): class MNIST(fluid.dygraph.Layer):
def __init__(self, name_scope): def __init__(self):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
@ -86,19 +86,21 @@ class MNIST(fluid.dygraph.Layer):
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 self.pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(), self._fc = Linear(
10, self.pool_2_shape,
param_attr=fluid.param_attr.ParamAttr( 10,
initializer=fluid.initializer.NormalInitializer( param_attr=fluid.param_attr.ParamAttr(
loc=0.0, scale=scale)), initializer=fluid.initializer.NormalInitializer(
act="softmax") loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs): def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x) x = self._simple_img_conv_pool_2(x)
x = fluid.layers.reshape(x, shape=[-1, self.pool_2_shape])
x = self._fc(x) x = self._fc(x)
return x return x
@ -125,8 +127,9 @@ class TestImperativeMnist(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist") mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=mnist.parameters())
batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader = fluid.io.PyReader(capacity=1)
batch_py_reader.decorate_sample_list_generator( batch_py_reader.decorate_sample_list_generator(
@ -189,7 +192,7 @@ class TestImperativeMnist(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist") mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), paddle.dataset.mnist.train(),

@ -39,8 +39,9 @@ class TestImperativeMnistSortGradient(unittest.TestCase):
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
mnist2 = MNIST("mnist") mnist2 = MNIST()
sgd2 = SGDOptimizer(learning_rate=1e-3) sgd2 = SGDOptimizer(
learning_rate=1e-3, parameter_list=mnist2.parameters())
train_reader2 = paddle.batch( train_reader2 = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
@ -85,7 +86,7 @@ class TestImperativeMnistSortGradient(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mnist = MNIST("mnist") mnist = MNIST()
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)

@ -18,7 +18,7 @@ import numpy as np
import six import six
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm, Embedding, GRUUnit from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, BatchNorm, Embedding, GRUUnit
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
@ -27,6 +27,8 @@ class Config(object):
''' '''
config for training config for training
''' '''
# encoder rnn hidden_size
encoder_size = 200
# decoder size for decoder stage # decoder size for decoder stage
decoder_size = 128 decoder_size = 128
# size for word embedding # size for word embedding
@ -118,8 +120,8 @@ class ConvBNPool(fluid.dygraph.Layer):
class OCRConv(fluid.dygraph.Layer): class OCRConv(fluid.dygraph.Layer):
def __init__(self, name_scope, is_test=False, use_cudnn=True): def __init__(self, is_test=False, use_cudnn=True):
super(OCRConv, self).__init__(name_scope) super(OCRConv, self).__init__()
self.conv_bn_pool_1 = ConvBNPool( self.conv_bn_pool_1 = ConvBNPool(
2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn) 2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn)
self.conv_bn_pool_2 = ConvBNPool( self.conv_bn_pool_2 = ConvBNPool(
@ -143,7 +145,6 @@ class OCRConv(fluid.dygraph.Layer):
class DynamicGRU(fluid.dygraph.Layer): class DynamicGRU(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
scope_name,
size, size,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
@ -152,7 +153,7 @@ class DynamicGRU(fluid.dygraph.Layer):
candidate_activation='tanh', candidate_activation='tanh',
h_0=None, h_0=None,
origin_mode=False): origin_mode=False):
super(DynamicGRU, self).__init__(scope_name) super(DynamicGRU, self).__init__()
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
size * 3, size * 3,
@ -164,6 +165,7 @@ class DynamicGRU(fluid.dygraph.Layer):
self.h_0 = h_0 self.h_0 = h_0
self.is_reverse = is_reverse self.is_reverse = is_reverse
self.size = size
def forward(self, inputs): def forward(self, inputs):
hidden = self.h_0 hidden = self.h_0
@ -188,11 +190,10 @@ class DynamicGRU(fluid.dygraph.Layer):
class EncoderNet(fluid.dygraph.Layer): class EncoderNet(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
scope_name, rnn_hidden_size=Config.encoder_size,
rnn_hidden_size=200,
is_test=False, is_test=False,
use_cudnn=True): use_cudnn=True):
super(EncoderNet, self).__init__(scope_name) super(EncoderNet, self).__init__()
self.rnn_hidden_size = rnn_hidden_size self.rnn_hidden_size = rnn_hidden_size
para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0, para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0,
0.02)) 0.02))
@ -207,28 +208,19 @@ class EncoderNet(fluid.dygraph.Layer):
shape=[Config.batch_size, rnn_hidden_size], shape=[Config.batch_size, rnn_hidden_size],
dtype='float32', dtype='float32',
value=0) value=0)
self.ocr_convs = OCRConv( self.ocr_convs = OCRConv(is_test=is_test, use_cudnn=use_cudnn)
self.full_name(), is_test=is_test, use_cudnn=use_cudnn)
self.fc_1_layer = Linear(
self.fc_1_layer = FC(self.full_name(), 768, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False)
rnn_hidden_size * 3, self.fc_2_layer = Linear(
param_attr=para_attr, 768, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False)
bias_attr=False,
num_flatten_dims=2)
self.fc_2_layer = FC(self.full_name(),
rnn_hidden_size * 3,
param_attr=para_attr,
bias_attr=False,
num_flatten_dims=2)
self.gru_forward_layer = DynamicGRU( self.gru_forward_layer = DynamicGRU(
self.full_name(),
size=rnn_hidden_size, size=rnn_hidden_size,
h_0=h_0, h_0=h_0,
param_attr=para_attr, param_attr=para_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
candidate_activation='relu') candidate_activation='relu')
self.gru_backward_layer = DynamicGRU( self.gru_backward_layer = DynamicGRU(
self.full_name(),
size=rnn_hidden_size, size=rnn_hidden_size,
h_0=h_0, h_0=h_0,
param_attr=para_attr, param_attr=para_attr,
@ -236,10 +228,8 @@ class EncoderNet(fluid.dygraph.Layer):
candidate_activation='relu', candidate_activation='relu',
is_reverse=True) is_reverse=True)
self.encoded_proj_fc = FC(self.full_name(), self.encoded_proj_fc = Linear(
Config.decoder_size, rnn_hidden_size * 2, Config.decoder_size, bias_attr=False)
bias_attr=False,
num_flatten_dims=2)
def forward(self, inputs): def forward(self, inputs):
conv_features = self.ocr_convs(inputs) conv_features = self.ocr_convs(inputs)
@ -272,18 +262,12 @@ class EncoderNet(fluid.dygraph.Layer):
class SimpleAttention(fluid.dygraph.Layer): class SimpleAttention(fluid.dygraph.Layer):
def __init__(self, scope_name, decoder_size): def __init__(self, decoder_size):
super(SimpleAttention, self).__init__(scope_name) super(SimpleAttention, self).__init__()
self.fc_1 = FC(self.full_name(), self.fc_1 = Linear(
decoder_size, decoder_size, decoder_size, act=None, bias_attr=False)
act=None, self.fc_2 = Linear(decoder_size, 1, act=None, bias_attr=False)
bias_attr=False)
self.fc_2 = FC(self.full_name(),
1,
num_flatten_dims=2,
act=None,
bias_attr=False)
def forward(self, encoder_vec, encoder_proj, decoder_state): def forward(self, encoder_vec, encoder_proj, decoder_state):
@ -311,22 +295,18 @@ class SimpleAttention(fluid.dygraph.Layer):
class GRUDecoderWithAttention(fluid.dygraph.Layer): class GRUDecoderWithAttention(fluid.dygraph.Layer):
def __init__(self, scope_name, decoder_size, num_classes): def __init__(self, decoder_size, num_classes):
super(GRUDecoderWithAttention, self).__init__(scope_name) super(GRUDecoderWithAttention, self).__init__()
self.simple_attention = SimpleAttention(self.full_name(), decoder_size) self.simple_attention = SimpleAttention(decoder_size)
self.fc_1_layer = FC(self.full_name(), self.fc_1_layer = Linear(
size=decoder_size * 3, Config.encoder_size * 2, decoder_size * 3, bias_attr=False)
bias_attr=False) self.fc_2_layer = Linear(
self.fc_2_layer = FC(self.full_name(), decoder_size, decoder_size * 3, bias_attr=False)
size=decoder_size * 3,
bias_attr=False)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
size=decoder_size * 3, param_attr=None, bias_attr=None) size=decoder_size * 3, param_attr=None, bias_attr=None)
self.out_layer = FC(self.full_name(), self.out_layer = Linear(
size=num_classes + 2, decoder_size, num_classes + 2, bias_attr=None, act='softmax')
bias_attr=None,
act='softmax')
self.decoder_size = decoder_size self.decoder_size = decoder_size
@ -357,17 +337,18 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
class OCRAttention(fluid.dygraph.Layer): class OCRAttention(fluid.dygraph.Layer):
def __init__(self, scope_name): def __init__(self):
super(OCRAttention, self).__init__(scope_name) super(OCRAttention, self).__init__()
self.encoder_net = EncoderNet(self.full_name()) self.encoder_net = EncoderNet()
self.fc = FC(self.full_name(), self.fc = Linear(
size=Config.decoder_size, Config.encoder_size,
bias_attr=False, Config.decoder_size,
act='relu') bias_attr=False,
act='relu')
self.embedding = Embedding( self.embedding = Embedding(
[Config.num_classes + 2, Config.word_vector_dim], dtype='float32') [Config.num_classes + 2, Config.word_vector_dim], dtype='float32')
self.gru_decoder_with_attention = GRUDecoderWithAttention( self.gru_decoder_with_attention = GRUDecoderWithAttention(
self.full_name(), Config.decoder_size, Config.num_classes) Config.decoder_size, Config.num_classes)
def forward(self, inputs, label_in): def forward(self, inputs, label_in):
gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs) gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs)
@ -425,14 +406,15 @@ class TestDygraphOCRAttention(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy = fluid.dygraph.BackwardStrategy()
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
ocr_attention = OCRAttention("ocr_attention") ocr_attention = OCRAttention()
if Config.learning_rate_decay == "piecewise_decay": if Config.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay( learning_rate = fluid.layers.piecewise_decay(
[50000], [Config.LR, Config.LR * 0.01]) [50000], [Config.LR, Config.LR * 0.01])
else: else:
learning_rate = Config.LR learning_rate = Config.LR
optimizer = fluid.optimizer.SGD(learning_rate=0.001) optimizer = fluid.optimizer.SGD(
learning_rate=0.001, parameter_list=ocr_attention.parameters())
dy_param_init_value = {} dy_param_init_value = {}
for param in ocr_attention.parameters(): for param in ocr_attention.parameters():
dy_param_init_value[param.name] = param.numpy() dy_param_init_value[param.name] = param.numpy()
@ -478,7 +460,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
# print("static start") # print("static start")
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
ocr_attention = OCRAttention("ocr_attention") ocr_attention = OCRAttention()
if Config.learning_rate_decay == "piecewise_decay": if Config.learning_rate_decay == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay( learning_rate = fluid.layers.piecewise_decay(

@ -23,17 +23,17 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer, Adam from paddle.fluid.optimizer import SGDOptimizer, Adam
from paddle.fluid.dygraph.nn import FC from paddle.fluid.dygraph import Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
class MLP(fluid.Layer): class MLP(fluid.Layer):
def __init__(self, name_scope, param_attr=None, bias_attr=None): def __init__(self, param_attr=None, bias_attr=None):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__()
self._fc1 = FC(self.full_name(), 10) self._fc1 = Linear(784, 10)
self._fc2 = FC(self.full_name(), 10) self._fc2 = Linear(10, 10)
def forward(self, inputs): def forward(self, inputs):
y = self._fc1(inputs) y = self._fc1(inputs)
@ -45,13 +45,16 @@ class TestImperativeOptimizerBase(unittest.TestCase):
def setUp(self): def setUp(self):
self.batch_num = 20 self.batch_num = 20
def get_optimizer_dygraph(self, parameter_list):
raise NotImplementedError()
def get_optimizer(self): def get_optimizer(self):
raise NotImplementedError() raise NotImplementedError()
def reader_decorator(self, reader): def reader_decorator(self, reader):
def _reader_imple(): def _reader_imple():
for item in reader(): for item in reader():
image = np.array(item[0]).reshape(1, 28, 28) image = np.array(item[0]).reshape(1, 784)
label = np.array(item[1]).astype('int64').reshape(1) label = np.array(item[1]).astype('int64').reshape(1)
yield image, label yield image, label
@ -65,8 +68,9 @@ class TestImperativeOptimizerBase(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
mlp = MLP('mlp') mlp = MLP()
optimizer = self.get_optimizer() optimizer = self.get_optimizer_dygraph(
parameter_list=mlp.parameters())
batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader = fluid.io.PyReader(capacity=1)
batch_py_reader.decorate_sample_list_generator( batch_py_reader.decorate_sample_list_generator(
@ -85,6 +89,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
label = data[1] label = data[1]
label.stop_gradient = True label.stop_gradient = True
img = fluid.layers.reshape(img, shape=[batch_size, -1])
cost = mlp(img) cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost) avg_loss = fluid.layers.reduce_mean(cost)
dy_out = avg_loss.numpy() dy_out = avg_loss.numpy()
@ -107,7 +112,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
mlp = MLP('mlp') mlp = MLP()
optimizer = self.get_optimizer() optimizer = self.get_optimizer()
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True) paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
@ -115,6 +120,7 @@ class TestImperativeOptimizerBase(unittest.TestCase):
img = fluid.layers.data( img = fluid.layers.data(
name='pixel', shape=[1, 28, 28], dtype='float32') name='pixel', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
img = fluid.layers.reshape(img, shape=[batch_size, -1])
cost = mlp(img) cost = mlp(img)
avg_loss = fluid.layers.reduce_mean(cost) avg_loss = fluid.layers.reduce_mean(cost)
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
@ -162,6 +168,15 @@ class TestImperativeOptimizerBase(unittest.TestCase):
class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
bd = [3, 6, 9]
optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
bd = [3, 6, 9] bd = [3, 6, 9]
optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
@ -173,6 +188,16 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = SGDOptimizer(
learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.natural_exp_decay(
learning_rate=0.1, learning_rate=0.1,
@ -186,6 +211,16 @@ class TestImperativeOptimizerNaturalExpDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = SGDOptimizer(
learning_rate=fluid.layers.exponential_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.exponential_decay(
learning_rate=0.1, learning_rate=0.1,
@ -199,6 +234,16 @@ class TestImperativeOptimizerExponentialDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = Adam(
learning_rate=fluid.layers.inverse_time_decay(
learning_rate=0.1,
decay_steps=10000,
decay_rate=0.5,
staircase=True),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay( optimizer = Adam(learning_rate=fluid.layers.inverse_time_decay(
learning_rate=0.1, learning_rate=0.1,
@ -212,6 +257,13 @@ class TestImperativeOptimizerInverseTimeDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = SGDOptimizer(
learning_rate=fluid.layers.polynomial_decay(
learning_rate=0.1, decay_steps=5, cycle=self.cycle),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.polynomial_decay(
learning_rate=0.1, decay_steps=5, cycle=self.cycle)) learning_rate=0.1, decay_steps=5, cycle=self.cycle))
@ -227,6 +279,13 @@ class TestImperativeOptimizerPolynomialDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = SGDOptimizer(
learning_rate=fluid.layers.cosine_decay(
learning_rate=0.1, step_each_epoch=10000, epochs=120),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.cosine_decay(
learning_rate=0.1, step_each_epoch=10000, epochs=120)) learning_rate=0.1, step_each_epoch=10000, epochs=120))
@ -237,6 +296,13 @@ class TestImperativeOptimizerCosineDecay(TestImperativeOptimizerBase):
class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase): class TestImperativeOptimizerNoamDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = SGDOptimizer(
learning_rate=fluid.layers.noam_decay(
d_model=512, warmup_steps=8000),
parameter_list=parameter_list)
return optimizer
def get_optimizer(self): def get_optimizer(self):
optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay( optimizer = SGDOptimizer(learning_rate=fluid.layers.noam_decay(
d_model=512, warmup_steps=8000)) d_model=512, warmup_steps=8000))

@ -38,7 +38,8 @@ class TestImperativePartitialBackward(unittest.TestCase):
for param in fc2.parameters(): for param in fc2.parameters():
self.assertIsNone(param._grad_ivar()) self.assertIsNone(param._grad_ivar())
optimizer = fluid.optimizer.AdamOptimizer() optimizer = fluid.optimizer.AdamOptimizer(parameter_list=(
fc1.parameters() + fc2.parameters()))
_, params_grads = optimizer.minimize(loss) _, params_grads = optimizer.minimize(loss)
self.assertListEqual( self.assertListEqual(

@ -30,13 +30,12 @@ from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
class SimpleLSTMRNN(fluid.Layer): class SimpleLSTMRNN(fluid.Layer):
def __init__(self, def __init__(self,
name_scope,
hidden_size, hidden_size,
num_steps, num_steps,
num_layers=2, num_layers=2,
init_scale=0.1, init_scale=0.1,
dropout=None): dropout=None):
super(SimpleLSTMRNN, self).__init__(name_scope) super(SimpleLSTMRNN, self).__init__()
self._hidden_size = hidden_size self._hidden_size = hidden_size
self._num_layers = num_layers self._num_layers = num_layers
self._init_scale = init_scale self._init_scale = init_scale
@ -45,8 +44,9 @@ class SimpleLSTMRNN(fluid.Layer):
self._num_steps = num_steps self._num_steps = num_steps
self.cell_array = [] self.cell_array = []
self.hidden_array = [] self.hidden_array = []
self._create_parameter()
def _build_once(self, input_embedding, init_hidden=None, init_cell=None): def _create_parameter(self):
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
@ -135,7 +135,6 @@ class SimpleLSTMRNN(fluid.Layer):
class PtbModel(fluid.Layer): class PtbModel(fluid.Layer):
def __init__(self, def __init__(self,
name_scope,
hidden_size, hidden_size,
vocab_size, vocab_size,
num_layers=2, num_layers=2,
@ -143,7 +142,7 @@ class PtbModel(fluid.Layer):
init_scale=0.1, init_scale=0.1,
is_sparse=False, is_sparse=False,
dropout=None): dropout=None):
super(PtbModel, self).__init__(name_scope) super(PtbModel, self).__init__()
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.init_scale = init_scale self.init_scale = init_scale
@ -151,7 +150,6 @@ class PtbModel(fluid.Layer):
self.num_steps = num_steps self.num_steps = num_steps
self.dropout = dropout self.dropout = dropout
self.simple_lstm_rnn = SimpleLSTMRNN( self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(),
hidden_size, hidden_size,
num_steps, num_steps,
num_layers=num_layers, num_layers=num_layers,
@ -231,7 +229,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
@ -239,7 +236,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_scale=init_scale, init_scale=init_scale,
is_sparse=is_sparse) is_sparse=is_sparse)
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=ptb_model.parameters())
dy_param_updated = dict() dy_param_updated = dict()
dy_param_init = dict() dy_param_init = dict()
dy_loss = None dy_loss = None
@ -298,7 +296,6 @@ class TestDygraphPtbRnn(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,

@ -49,7 +49,6 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
backward_strategy.sort_sum_gradient = True backward_strategy.sort_sum_gradient = True
# TODO: marsyang1993 Change seed to # TODO: marsyang1993 Change seed to
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,
@ -57,7 +56,8 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
init_scale=init_scale, init_scale=init_scale,
is_sparse=is_sparse) is_sparse=is_sparse)
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=ptb_model.parameters())
dy_param_updated = dict() dy_param_updated = dict()
dy_param_init = dict() dy_param_init = dict()
dy_loss = None dy_loss = None
@ -97,7 +97,6 @@ class TestDygraphPtbRnnSortGradient(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
ptb_model = PtbModel( ptb_model = PtbModel(
"ptb_model",
hidden_size=hidden_size, hidden_size=hidden_size,
vocab_size=vocab_size, vocab_size=vocab_size,
num_layers=num_layers, num_layers=num_layers,

@ -86,7 +86,8 @@ class TestImperativeMnist(unittest.TestCase):
loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs) loss_probs = fluid.layers.elementwise_mul(dy_reward, loss_probs)
loss = fluid.layers.reduce_sum(loss_probs) loss = fluid.layers.reduce_sum(loss_probs)
sgd = SGDOptimizer(learning_rate=1e-3) sgd = SGDOptimizer(
learning_rate=1e-3, parameter_list=policy.parameters())
dy_param_init_value = {} dy_param_init_value = {}

@ -21,7 +21,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from test_imperative_base import new_program_scope from test_imperative_base import new_program_scope
from utils import DyGraphProgramDescTracerTestHelper, is_equal_program from utils import DyGraphProgramDescTracerTestHelper, is_equal_program
@ -44,7 +44,7 @@ train_parameters = {
} }
def optimizer_setting(params): def optimizer_setting(params, parameter_list=None):
ls = params["learning_strategy"] ls = params["learning_strategy"]
if ls["name"] == "piecewise_decay": if ls["name"] == "piecewise_decay":
if "total_images" not in params: if "total_images" not in params:
@ -58,14 +58,18 @@ def optimizer_setting(params):
base_lr = params["lr"] base_lr = params["lr"]
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.SGD(learning_rate=0.01) if fluid.in_dygraph_mode():
optimizer = fluid.optimizer.SGD(learning_rate=0.01,
parameter_list=parameter_list)
else:
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
# TODO(minqiyang): Add learning rate scheduler support to dygraph mode # TODO(minqiyang): Add learning rate scheduler support to dygraph mode
# optimizer = fluid.optimizer.Momentum( # optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"], # learning_rate=params["lr"],
# learning_rate=fluid.layers.piecewise_decay( # learning_rate=fluid.layers.piecewise_decay(
# boundaries=bd, values=lr), # boundaries=bd, values=lr),
# momentum=0.9, # momentum=0.9,
# regularization=fluid.regularizer.L2Decay(1e-4)) # regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer return optimizer
@ -147,8 +151,8 @@ class BottleneckBlock(fluid.Layer):
class ResNet(fluid.Layer): class ResNet(fluid.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope) super(ResNet, self).__init__()
self.layers = layers self.layers = layers
supported_layers = [50, 101, 152] supported_layers = [50, 101, 152]
@ -187,14 +191,17 @@ class ResNet(fluid.Layer):
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[-1] * 4 * 1 * 1
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(), self.out = Linear(
size=class_dim, self.pool2d_avg_output,
act='softmax', class_dim,
param_attr=fluid.param_attr.ParamAttr( act='softmax',
initializer=fluid.initializer.Uniform(-stdv, stdv))) param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs): def forward(self, inputs):
y = self.conv(inputs) y = self.conv(inputs)
@ -202,6 +209,7 @@ class ResNet(fluid.Layer):
for bottleneck_block in self.bottleneck_block_list: for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y) y = bottleneck_block(y)
y = self.pool2d_avg(y) y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y) y = self.out(y)
return y return y
@ -228,8 +236,9 @@ class TestDygraphResnet(unittest.TestCase):
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
resnet = ResNet("resnet") resnet = ResNet()
optimizer = optimizer_setting(train_parameters) optimizer = optimizer_setting(
train_parameters, parameter_list=resnet.parameters())
np.random.seed(seed) np.random.seed(seed)
import random import random
random.seed = seed random.seed = seed
@ -315,7 +324,7 @@ class TestDygraphResnet(unittest.TestCase):
exe = fluid.Executor(fluid.CPUPlace( exe = fluid.Executor(fluid.CPUPlace(
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
resnet = ResNet("resnet") resnet = ResNet()
optimizer = optimizer_setting(train_parameters) optimizer = optimizer_setting(train_parameters)
np.random.seed(seed) np.random.seed(seed)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save