[2.0API]support 2.0 lr_scheduler for 2.0 optimizer (#26737)

* support 2.0 lr_scheduler for 2.0 optimizer

* fix unittest

* fix doc

* fix unittest

* fix sample code, fix unittest
revert-26856-strategy_example2
Zhou Wei 5 years ago committed by GitHub
parent 29494d703d
commit 30aab17734
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -456,8 +456,9 @@ class TestAdamOpV2(unittest.TestCase):
state_dict = adam.state_dict()
adam.set_state_dict(state_dict)
#learning_rate is Decay
learning_rate = fluid.dygraph.CosineDecay(0.1, 10000, 120)
#learning_rate is _LRScheduler
learning_rate = paddle.optimizer.CosineAnnealingLR(
learning_rate=0.1, T_max=10)
adam = paddle.optimizer.Adam(
learning_rate=learning_rate,
weight_decay=fluid.regularizer.L2Decay(0.001),
@ -498,15 +499,10 @@ class TestAdamOpV2(unittest.TestCase):
adam.set_lr(lr)
cur_lr = adam.get_lr()
assert (lr == cur_lr)
lr_var = paddle.create_global_var(shape=[1], value=lr, dtype='float32')
adam.set_lr(lr_var)
cur_lr = adam.get_lr()
assert (np.float32(lr) == cur_lr)
with self.assertRaises(TypeError):
lr = int(1)
adam.set_lr(lr)
lr_var = paddle.create_global_var(
shape=[1], value=lr, dtype='float32')
adam.set_lr(lr_var)
if __name__ == "__main__":

@ -200,7 +200,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
bd = [3, 6, 9]
optimizer = SGDOptimizer(
learning_rate=fluid.layers.piecewise_decay(
learning_rate=paddle.optimizer.PiecewiseLR(
boundaries=bd,
values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]),
parameter_list=parameter_list)
@ -208,7 +208,7 @@ class TestImperativeOptimizerPiecewiseDecay(TestImperativeOptimizerBase):
def get_optimizer(self):
bd = [3, 6, 9]
optimizer = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
optimizer = SGDOptimizer(learning_rate=paddle.optimizer.PiecewiseLR(
boundaries=bd, values=[0.1 * (0.1**i) for i in range(len(bd) + 1)]))
return optimizer
@ -381,9 +381,9 @@ class TestOptimizerLearningRate(unittest.TestCase):
bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0]
scheduler = paddle.optimizer.PiecewiseLR(bd, value)
adam = paddle.optimizer.Adam(
fluid.dygraph.PiecewiseDecay(bd, value, 0),
parameters=linear.parameters())
scheduler, parameters=linear.parameters())
self.assertTrue(
np.allclose(
@ -393,8 +393,8 @@ class TestOptimizerLearningRate(unittest.TestCase):
for i in range(12):
adam.minimize(loss)
lr = adam.get_lr()
self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
scheduler.step()
def test_lr_decay_natural_exp(self):
with fluid.dygraph.guard():
@ -409,24 +409,21 @@ class TestOptimizerLearningRate(unittest.TestCase):
loss = fluid.layers.reduce_mean(b)
base_lr = 1.0
scheduler = paddle.optimizer.NaturalExpLR(1.0, gamma=0.5)
print("scheduler.last_lr", scheduler.last_lr)
adam = paddle.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=base_lr,
decay_steps=3,
decay_rate=0.5,
staircase=True),
parameters=linear.parameters())
scheduler, parameters=linear.parameters())
self.assertTrue(
np.allclose(
adam.get_lr(), 1.0, rtol=1e-06, atol=0.0))
ret = [1.0, 1.0, 1.0, np.exp(-0.5), np.exp(-0.5)]
for i in range(5):
ret = [1.0, np.exp(-0.5), np.exp(-1)]
for i in range(3):
adam.minimize(loss)
lr = adam.get_lr()
self.assertTrue(np.allclose(lr, ret[i], rtol=1e-06, atol=0.0))
scheduler.step()
def test_set_lr(self):
with fluid.dygraph.guard():
@ -451,20 +448,15 @@ class TestOptimizerLearningRate(unittest.TestCase):
np.allclose(
lr, lr_list[i], rtol=1e-06, atol=0.0))
with self.assertRaises(TypeError):
lr_var = fluid.layers.create_global_var(
shape=[1], value=0.7, dtype='float32')
adam.set_lr(lr_var)
adam.minimize(loss)
lr = adam.get_lr()
self.assertTrue(np.allclose(lr, 0.7, rtol=1e-06, atol=0.0))
with self.assertRaises(RuntimeError):
adam = paddle.optimizer.Adam(
fluid.dygraph.NaturalExpDecay(
learning_rate=0.1,
decay_steps=3,
decay_rate=0.5,
staircase=True),
paddle.optimizer.NaturalExpLR(
learning_rate=0.1, gamma=0.5),
parameters=linear.parameters())
adam.set_lr(0.01)

@ -374,6 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
adam._learning_rate.step_num = 0
para_state_dict, opti_state_dict = paddle.load("./test_dy")
print(opti_state_dict['LR_Scheduler'])
adam.set_dict(opti_state_dict)
opti_dict = adam.state_dict()

@ -239,10 +239,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
@ -268,7 +268,9 @@ class TestDygraphPtbRnn(unittest.TestCase):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
dy_param_updated[param.name] = param.numpy()
@ -283,7 +285,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
else:
self.base_opti[k] = v
fluid.save_dygraph(self.opti_dict, "./test_dy")
fluid.save_dygraph(self.opti_dict, "./test_dy_v2")
self.state_dict = ptb_model.state_dict()
@ -292,7 +294,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
np_t = v.numpy()
self.model_base[k] = np_t
paddle.save(self.state_dict, "./test_dy")
paddle.save(self.state_dict, "./test_dy_v2")
def testLoadAndSetVarBase(self):
seed = 90
@ -325,10 +327,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
@ -354,6 +356,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
@ -370,10 +373,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
if isinstance(adam._learning_rate, LearningRateDecay):
adam._learning_rate.step_num = 0
para_state_dict, opti_state_dict = paddle.load("./test_dy")
para_state_dict, opti_state_dict = paddle.load("./test_dy_v2")
adam.set_state_dict(opti_state_dict)
opti_dict = adam.state_dict()
@ -434,10 +434,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
@ -463,6 +463,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
@ -541,10 +542,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
parameters=ptb_model.parameters())
learning_rate=scheduler, parameters=ptb_model.parameters())
dy_param_updated = dict()
dy_param_init = dict()
dy_loss = None
@ -570,6 +571,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
dy_param_init[param.name] = param.numpy()
dy_loss.backward()
adam.minimize(dy_loss)
scheduler.step()
ptb_model.clear_gradients()
if i == batch_num - 1:
for param in ptb_model.parameters():
@ -745,7 +747,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
last_hidden = None
last_cell = None
state_dict, opti_dict = fluid.load_dygraph("./test_dy")
state_dict, opti_dict = fluid.load_dygraph("./test_dy_v2")
adam.set_state_dict(opti_dict)
ptb_model.set_dict(state_dict)
@ -825,9 +827,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr_arr)
adam = Adam(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr_arr),
learning_rate=scheduler,
beta1=0.8,
beta2=0.6,
parameters=ptb_model.parameters())
@ -867,14 +870,16 @@ class TestDygraphPtbRnn(unittest.TestCase):
init_cell)
dy_loss.backward()
scheduler.step()
adam.minimize(dy_loss)
ptb_model.clear_gradients()
opti_dict = adam.state_dict()
for k, v in opti_dict.items():
if k == "global_step":
if k == "LR_Scheduler":
self.assertTrue(
np.array_equal(v.numpy(), self.base_opti[v.name] + 1))
np.array_equal(v['last_epoch'], self.base_opti[k][
'last_epoch'] + 1))
if k.find("beta1_pow_acc_0") > 0:
self.assertTrue(

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save