fix english doc, unittest, and remove useless alias of 2.0 lr_scheduler (#27686)

* fix doc and unittest of 2.0 lr_scheduler

* fix doc of 2.0 lr_scheduler

* fix unittest

* fix english doc of lr_scheduler

* fix api name of lr scheduler

* fix api name of lr scheduler
my_2.0rc
Zhou Wei 4 years ago committed by GitHub
parent 9215ad96ca
commit e122e16456
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -237,13 +237,6 @@ from .framework import save #DEFINE_ALIAS
from .framework import load #DEFINE_ALIAS from .framework import load #DEFINE_ALIAS
from .framework import DataParallel #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS
from .framework import NoamDecay #DEFINE_ALIAS
from .framework import PiecewiseDecay #DEFINE_ALIAS
from .framework import NaturalExpDecay #DEFINE_ALIAS
from .framework import ExponentialDecay #DEFINE_ALIAS
from .framework import InverseTimeDecay #DEFINE_ALIAS
from .framework import PolynomialDecay #DEFINE_ALIAS
from .framework import CosineDecay #DEFINE_ALIAS
from .framework import set_default_dtype #DEFINE_ALIAS from .framework import set_default_dtype #DEFINE_ALIAS
from .framework import get_default_dtype #DEFINE_ALIAS from .framework import get_default_dtype #DEFINE_ALIAS

@ -164,7 +164,7 @@ def load_dygraph(model_path, **configs):
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy") fluid.save_dygraph(state_dict, "paddle_dy")
scheduler = paddle.optimizer.lr_scheduler.NoamLR( scheduler = paddle.optimizer.lr.NoamDecay(
d_model=0.01, warmup_steps=100, verbose=True) d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=scheduler, learning_rate=scheduler,

@ -855,7 +855,7 @@ class Executor(object):
def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name, def _run_parallel(self, program, scope, feed, fetch_list, fetch_var_name,
return_numpy, return_merged): return_numpy, return_merged):
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
exe = program._executor exe = program._executor
# TODO(zhenghuihuang): quantization uses Graph in CompiledProgram # TODO(zhenghuihuang): quantization uses Graph in CompiledProgram
# instead of program. We will add support for checking Vars in Graph # instead of program. We will add support for checking Vars in Graph
@ -901,7 +901,7 @@ class Executor(object):
if hasattr(program._program, 'lr_sheduler'): if hasattr(program._program, 'lr_sheduler'):
lr_sheduler = program._program.lr_sheduler lr_sheduler = program._program.lr_sheduler
assert isinstance(lr_sheduler, _LRScheduler), "must be _LRScheduler" assert isinstance(lr_sheduler, LRScheduler), "must be LRScheduler"
lr_value = lr_sheduler() lr_value = lr_sheduler()
lr_var = program._program.global_block().vars[lr_sheduler._var_name] lr_var = program._program.global_block().vars[lr_sheduler._var_name]
lr_tensor = _as_lodtensor(lr_value, core.CPUPlace(), lr_var.dtype) lr_tensor = _as_lodtensor(lr_value, core.CPUPlace(), lr_var.dtype)
@ -1238,7 +1238,7 @@ class Executor(object):
def _run_program(self, program, feed, fetch_list, feed_var_name, def _run_program(self, program, feed, fetch_list, feed_var_name,
fetch_var_name, scope, return_numpy, use_program_cache): fetch_var_name, scope, return_numpy, use_program_cache):
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
if feed is None: if feed is None:
feed = {} feed = {}
elif isinstance(feed, (list, tuple)): elif isinstance(feed, (list, tuple)):
@ -1296,7 +1296,7 @@ class Executor(object):
self._feed_data(program, feed, feed_var_name, scope) self._feed_data(program, feed, feed_var_name, scope)
if hasattr(program, 'lr_sheduler'): if hasattr(program, 'lr_sheduler'):
assert isinstance(program.lr_sheduler, assert isinstance(program.lr_sheduler,
_LRScheduler), "must be _LRScheduler" LRScheduler), "must be LRScheduler"
lr_sheduler = program.lr_sheduler lr_sheduler = program.lr_sheduler
lr_value = lr_sheduler() lr_value = lr_sheduler()
lr_var = program.global_block().vars[lr_sheduler._var_name] lr_var = program.global_block().vars[lr_sheduler._var_name]

@ -70,15 +70,15 @@ class Optimizer(object):
grad_clip=None, grad_clip=None,
name=None): name=None):
# Because of the loop import, so place it in the function body # Because of the loop import, so place it in the function body
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
self._parameter_list = list( self._parameter_list = list(
parameter_list) if parameter_list is not None else None parameter_list) if parameter_list is not None else None
self._name = name self._name = name
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
if not isinstance(learning_rate, if not isinstance(learning_rate,
(float, LearningRateDecay, _LRScheduler)): (float, LearningRateDecay, LRScheduler)):
raise TypeError( raise TypeError(
"learning rate should be float or _LRScheduler, got %s here" "learning rate should be float or LRScheduler, got %s here"
% type(learning_rate)) % type(learning_rate))
if self._parameter_list is None: if self._parameter_list is None:
raise AttributeError( raise AttributeError(
@ -94,9 +94,9 @@ class Optimizer(object):
break break
else: else:
if not isinstance(learning_rate, if not isinstance(learning_rate,
(float, framework.Variable, _LRScheduler)): (float, framework.Variable, LRScheduler)):
raise TypeError( raise TypeError(
"learning rate should be float or _LRScheduler, got %s here" "learning rate should be float or LRScheduler, got %s here"
% type(learning_rate)) % type(learning_rate))
if grad_clip is not None: if grad_clip is not None:
@ -147,13 +147,13 @@ class Optimizer(object):
state_dict = adam.state_dict() state_dict = adam.state_dict()
''' '''
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
state_dict = {} state_dict = {}
for k, v in self._accumulators.items(): for k, v in self._accumulators.items():
for para_name, var_tmp in v.items(): for para_name, var_tmp in v.items():
state_dict[var_tmp.name] = var_tmp state_dict[var_tmp.name] = var_tmp
# global step if use lr decay # global step if use lr decay
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
state_dict["LR_Scheduler"] = self._learning_rate.state_dict() state_dict["LR_Scheduler"] = self._learning_rate.state_dict()
return state_dict return state_dict
if isinstance(self._learning_rate, LearningRateDecay): if isinstance(self._learning_rate, LearningRateDecay):
@ -193,7 +193,7 @@ class Optimizer(object):
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "paddle_dy") fluid.save_dygraph(state_dict, "paddle_dy")
scheduler = paddle.optimizer.lr_scheduler.NoamLR( scheduler = paddle.optimizer.lr.NoamDecay(
d_model=0.01, warmup_steps=100, verbose=True) d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=scheduler, learning_rate=scheduler,
@ -203,8 +203,8 @@ class Optimizer(object):
para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy") para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy")
''' '''
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
self._learning_rate.set_dict(state_dict["LR_Scheduler"]) self._learning_rate.set_dict(state_dict["LR_Scheduler"])
if isinstance(self._learning_rate, LearningRateDecay): if isinstance(self._learning_rate, LearningRateDecay):
@ -269,8 +269,8 @@ class Optimizer(object):
return self._opti_name_list return self._opti_name_list
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
from paddle.optimizer.lr_scheduler import _LRScheduler from paddle.optimizer.lr import LRScheduler
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
lr_var = self._global_learning_rate() lr_var = self._global_learning_rate()
# only create global lr_var once # only create global lr_var once
if not isinstance(lr_var, framework.Variable): if not isinstance(lr_var, framework.Variable):

@ -455,8 +455,8 @@ class TestAdamOpV2(unittest.TestCase):
state_dict = adam.state_dict() state_dict = adam.state_dict()
adam.set_state_dict(state_dict) adam.set_state_dict(state_dict)
#learning_rate is _LRScheduler #learning_rate is LRScheduler
learning_rate = paddle.optimizer.CosineAnnealingLR( learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
learning_rate=0.1, T_max=10) learning_rate=0.1, T_max=10)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=learning_rate, learning_rate=learning_rate,

@ -43,14 +43,22 @@ class TestDirectory(unittest.TestCase):
'paddle.distributed.prepare_context', 'paddle.DataParallel', 'paddle.distributed.prepare_context', 'paddle.DataParallel',
'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static', 'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static',
'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer', 'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer',
'paddle.jit.save', 'paddle.jit.load', 'paddle.NoamDecay', 'paddle.jit.save', 'paddle.jit.load',
'paddle.PiecewiseDecay', 'paddle.NaturalExpDecay', 'paddle.optimizer.lr.LRScheduler', 'paddle.optimizer.lr.NoamDecay',
'paddle.ExponentialDecay', 'paddle.InverseTimeDecay', 'paddle.optimizer.lr.PiecewiseDecay',
'paddle.PolynomialDecay', 'paddle.CosineDecay', 'paddle.optimizer.lr.NaturalExpDecay',
'paddle.static.Executor', 'paddle.static.global_scope', 'paddle.optimizer.lr.ExponentialDecay',
'paddle.static.scope_guard', 'paddle.static.append_backward', 'paddle.optimizer.lr.InverseTimeDecay',
'paddle.static.gradients', 'paddle.static.BuildStrategy', 'paddle.optimizer.lr.PolynomialDecay',
'paddle.static.CompiledProgram', 'paddle.static.ExecutionStrategy', 'paddle.optimizer.lr.CosineAnnealingDecay',
'paddle.optimizer.lr.MultiStepDecay',
'paddle.optimizer.lr.StepDecay', 'paddle.optimizer.lr.LambdaDecay',
'paddle.optimizer.lr.ReduceOnPlateau',
'paddle.optimizer.lr.LinearWarmup', 'paddle.static.Executor',
'paddle.static.global_scope', 'paddle.static.scope_guard',
'paddle.static.append_backward', 'paddle.static.gradients',
'paddle.static.BuildStrategy', 'paddle.static.CompiledProgram',
'paddle.static.ExecutionStrategy',
'paddle.static.default_main_program', 'paddle.static.default_main_program',
'paddle.static.default_startup_program', 'paddle.static.Program', 'paddle.static.default_startup_program', 'paddle.static.Program',
'paddle.static.name_scope', 'paddle.static.program_guard', 'paddle.static.name_scope', 'paddle.static.program_guard',

@ -239,7 +239,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR( scheduler = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr_arr) boundaries=bd, values=lr_arr)
adam = Adam( adam = Adam(
learning_rate=scheduler, parameters=ptb_model.parameters()) learning_rate=scheduler, parameters=ptb_model.parameters())
@ -328,7 +328,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR( scheduler = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr_arr) boundaries=bd, values=lr_arr)
adam = Adam( adam = Adam(
learning_rate=scheduler, parameters=ptb_model.parameters()) learning_rate=scheduler, parameters=ptb_model.parameters())
@ -436,7 +436,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR( scheduler = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr_arr) boundaries=bd, values=lr_arr)
adam = Adam( adam = Adam(
learning_rate=scheduler, parameters=ptb_model.parameters()) learning_rate=scheduler, parameters=ptb_model.parameters())
@ -544,7 +544,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR( scheduler = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr_arr) boundaries=bd, values=lr_arr)
adam = Adam( adam = Adam(
learning_rate=scheduler, parameters=ptb_model.parameters()) learning_rate=scheduler, parameters=ptb_model.parameters())
@ -829,7 +829,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
place = fluid.CPUPlace() if not core.is_compiled_with_cuda( place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0) ) else fluid.CUDAPlace(0)
scheduler = paddle.optimizer.PiecewiseLR( scheduler = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr_arr) boundaries=bd, values=lr_arr)
adam = Adam( adam = Adam(
learning_rate=scheduler, learning_rate=scheduler,

@ -56,22 +56,22 @@ def reduce_lr_on_plateau(decay_rate, threshold, cooldown, patience, m, n, loss,
return var_list[1] return var_list[1]
class TestReduceLROnPlateauDecay(object): class TestReduceOnPlateauDecay(object):
def test_ReduceLR(self): def test_ReduceLR(self):
# the decay rate must be less than 1.0 # the decay rate must be less than 1.0
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, factor=2.0) paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, factor=2.0)
# the mode must be "min" or "max" # the mode must be "min" or "max"
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.optimizer.ReduceLROnPlateau(learning_rate=1.0, mode="test") paddle.optimizer.lr.ReduceOnPlateau(learning_rate=1.0, mode="test")
# the threshold_mode must be "rel" or "abs" # the threshold_mode must be "rel" or "abs"
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.optimizer.ReduceLROnPlateau( paddle.optimizer.lr.ReduceOnPlateau(
learning_rate=1.0, threshold_mode="test") learning_rate=1.0, threshold_mode="test")
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
paddle.optimizer.ReduceLROnPlateau(learning_rate="test") paddle.optimizer.lr.ReduceOnPlateau(learning_rate="test")
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
paddle.optimizer.ReduceLROnPlateau(learning_rate=0.5).step("test") paddle.optimizer.lr.ReduceOnPlateau(learning_rate=0.5).step("test")
places = [paddle.CPUPlace()] places = [paddle.CPUPlace()]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
@ -114,7 +114,7 @@ class TestReduceLROnPlateauDecay(object):
[1], 1, 'float32', persistable=True) [1], 1, 'float32', persistable=True)
paddle.increment(x) paddle.increment(x)
loss = paddle.sin(x) loss = paddle.sin(x)
scheduler = paddle.optimizer.ReduceLROnPlateau(**kwargs) scheduler = paddle.optimizer.lr.ReduceOnPlateau(**kwargs)
adam = paddle.optimizer.Adam(learning_rate=scheduler) adam = paddle.optimizer.Adam(learning_rate=scheduler)
adam.minimize(loss) adam.minimize(loss)
lr_var = adam._global_learning_rate() lr_var = adam._global_learning_rate()
@ -158,7 +158,7 @@ class TestReduceLROnPlateauDecay(object):
var_list = [best, current_lr, cooldown_counter, num_bad_epochs] var_list = [best, current_lr, cooldown_counter, num_bad_epochs]
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
scheduler = paddle.optimizer.ReduceLROnPlateau(**kwargs) scheduler = paddle.optimizer.lr.ReduceOnPlateau(**kwargs)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=scheduler, parameters=linear.parameters()) learning_rate=scheduler, parameters=linear.parameters())
@ -180,7 +180,7 @@ class TestReduceLROnPlateauDecay(object):
loss, var_list) loss, var_list)
self.assertEqual(current_lr, expected_lr) self.assertEqual(current_lr, expected_lr)
state_dict = adam.state_dict() state_dict = adam.state_dict()
scheduler1 = paddle.optimizer.ReduceLROnPlateau(**kwargs) scheduler1 = paddle.optimizer.lr.ReduceOnPlateau(**kwargs)
adam1 = paddle.optimizer.Adam( adam1 = paddle.optimizer.Adam(
learning_rate=scheduler1, parameters=linear.parameters()) learning_rate=scheduler1, parameters=linear.parameters())
adam1.set_state_dict(state_dict) adam1.set_state_dict(state_dict)
@ -420,7 +420,7 @@ class TestLRScheduler(unittest.TestCase):
adam.clear_grad() adam.clear_grad()
current_lr = adam.get_lr() current_lr = adam.get_lr()
expected_lr = python_func(epoch, **kwarg) expected_lr = python_func(epoch, **kwarg)
if paddle_api.__name__ != "CosineAnnealingLR": if paddle_api.__name__ != "CosineAnnealingDecay":
self.assertEqual(current_lr, expected_lr) self.assertEqual(current_lr, expected_lr)
scheduler.step() scheduler.step()
else: else:
@ -429,74 +429,75 @@ class TestLRScheduler(unittest.TestCase):
def test_scheduler(self): def test_scheduler(self):
with self.assertRaises(NotImplementedError): with self.assertRaises(NotImplementedError):
paddle.optimizer.lr_scheduler._LRScheduler().step() paddle.optimizer.lr.LRScheduler().step()
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
paddle.optimizer.MultiStepLR( paddle.optimizer.lr.MultiStepDecay(
learning_rate="test", milestones=[1, 2, 3]) learning_rate="test", milestones=[1, 2, 3])
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
paddle.optimizer.MultiStepLR(learning_rate=0.5, milestones='test') paddle.optimizer.lr.MultiStepDecay(
learning_rate=0.5, milestones='test')
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.optimizer.MultiStepLR( paddle.optimizer.lr.MultiStepDecay(
learning_rate=0.5, milestones=[3, 2, 1]) learning_rate=0.5, milestones=[3, 2, 1])
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.optimizer.MultiStepLR( paddle.optimizer.lr.MultiStepDecay(
learning_rate=0.5, milestones=[1, 2, 3], gamma=2) learning_rate=0.5, milestones=[1, 2, 3], gamma=2)
func_api_kwargs = [(noam_lr, paddle.optimizer.NoamLR, { func_api_kwargs = [(noam_lr, paddle.optimizer.lr.NoamDecay, {
"d_model": 0.01, "d_model": 0.01,
"warmup_steps": 100, "warmup_steps": 100,
"verbose": False "verbose": False
}), (piecewise_lr, paddle.optimizer.PiecewiseLR, { }), (piecewise_lr, paddle.optimizer.lr.PiecewiseDecay, {
"boundaries": [3, 6, 9, 15, 20], "boundaries": [3, 6, 9, 15, 20],
"values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], "values": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
"verbose": False "verbose": False
}), (natural_exp_lr, paddle.optimizer.NaturalExpLR, { }), (natural_exp_lr, paddle.optimizer.lr.NaturalExpDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"gamma": 0.1, "gamma": 0.1,
"verbose": True "verbose": True
}), (inverse_time_lr, paddle.optimizer.InverseTimeLR, { }), (inverse_time_lr, paddle.optimizer.lr.InverseTimeDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"gamma": 0.1, "gamma": 0.1,
"verbose": False "verbose": False
}), (polynomial_lr, paddle.optimizer.PolynomialLR, { }), (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"decay_steps": 20, "decay_steps": 20,
"end_lr": 0, "end_lr": 0,
"power": 1.0, "power": 1.0,
"cycle": False, "cycle": False,
"verbose": True "verbose": True
}), (polynomial_lr, paddle.optimizer.PolynomialLR, { }), (polynomial_lr, paddle.optimizer.lr.PolynomialDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"decay_steps": 20, "decay_steps": 20,
"end_lr": 0, "end_lr": 0,
"power": 1.0, "power": 1.0,
"cycle": True, "cycle": True,
"verbose": False "verbose": False
}), (linear_warmup_lr, paddle.optimizer.LinearLrWarmup, { }), (linear_warmup_lr, paddle.optimizer.lr.LinearWarmup, {
'learning_rate': 0.5, 'learning_rate': 0.5,
'warmup_steps': 20, 'warmup_steps': 20,
'start_lr': 0, 'start_lr': 0,
'end_lr': 0.5, 'end_lr': 0.5,
"verbose": True "verbose": True
}), (exponential_lr, paddle.optimizer.ExponentialLR, { }), (exponential_lr, paddle.optimizer.lr.ExponentialDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"gamma": 0.9, "gamma": 0.9,
"verbose": False "verbose": False
}), (multi_step_lr, paddle.optimizer.MultiStepLR, { }), (multi_step_lr, paddle.optimizer.lr.MultiStepDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"milestones": [3, 6, 9, 15, 20], "milestones": [3, 6, 9, 15, 20],
"gamma": 0.8, "gamma": 0.8,
"verbose": True "verbose": True
}), (step_lr, paddle.optimizer.StepLR, { }), (step_lr, paddle.optimizer.lr.StepDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"step_size": 2, "step_size": 2,
"gamma": 0.8, "gamma": 0.8,
"verbose": False "verbose": False
}), (lambda_lr, paddle.optimizer.LambdaLR, { }), (lambda_lr, paddle.optimizer.lr.LambdaDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"lr_lambda": lambda x: 0.95**x, "lr_lambda": lambda x: 0.95**x,
"verbose": True "verbose": True
}), (cosine_annealing_lr, paddle.optimizer.CosineAnnealingLR, { }), (cosine_annealing_lr, paddle.optimizer.lr.CosineAnnealingDecay, {
"learning_rate": 0.5, "learning_rate": 0.5,
"T_max": 10, "T_max": 10,
"verbose": False "verbose": False

@ -24,11 +24,6 @@ __all__ += [
'DataParallel' 'DataParallel'
] ]
__all__ += [
'NoamDecay', 'PiecewiseDecay', 'NaturalExpDecay', 'ExponentialDecay',
'InverseTimeDecay', 'PolynomialDecay', 'CosineDecay'
]
from . import random from . import random
from .random import manual_seed from .random import manual_seed
from .framework import get_default_dtype from .framework import get_default_dtype
@ -51,11 +46,3 @@ from ..fluid.dygraph.base import grad #DEFINE_ALIAS
from .io import save from .io import save
from .io import load from .io import load
from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import PiecewiseDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import NaturalExpDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import ExponentialDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import InverseTimeDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import PolynomialDecay #DEFINE_ALIAS
from ..fluid.dygraph.learning_rate_scheduler import CosineDecay #DEFINE_ALIAS

@ -228,7 +228,7 @@ def save(obj, path):
layer_state_dict = emb.state_dict() layer_state_dict = emb.state_dict()
paddle.save(layer_state_dict, "emb.pdparams") paddle.save(layer_state_dict, "emb.pdparams")
scheduler = paddle.optimizer.lr_scheduler.NoamLR( scheduler = paddle.optimizer.lr.NoamDecay(
d_model=0.01, warmup_steps=100, verbose=True) d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=scheduler, learning_rate=scheduler,
@ -320,7 +320,7 @@ def load(path, **configs):
layer_state_dict = emb.state_dict() layer_state_dict = emb.state_dict()
paddle.save(layer_state_dict, "emb.pdparams") paddle.save(layer_state_dict, "emb.pdparams")
scheduler = paddle.optimizer.lr_scheduler.NoamLR( scheduler = paddle.optimizer.lr.NoamDecay(
d_model=0.01, warmup_steps=100, verbose=True) d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam( adam = paddle.optimizer.Adam(
learning_rate=scheduler, learning_rate=scheduler,

@ -121,13 +121,6 @@ from .layer.conv import ConvTranspose3d #DEFINE_ALIAS
# from .layer.conv import TreeConv #DEFINE_ALIAS # from .layer.conv import TreeConv #DEFINE_ALIAS
# from .layer.conv import Conv1D #DEFINE_ALIAS # from .layer.conv import Conv1D #DEFINE_ALIAS
from .layer.extension import RowConv #DEFINE_ALIAS from .layer.extension import RowConv #DEFINE_ALIAS
# from .layer.learning_rate import CosineDecay #DEFINE_ALIAS
# from .layer.learning_rate import ExponentialDecay #DEFINE_ALIAS
# from .layer.learning_rate import InverseTimeDecay #DEFINE_ALIAS
# from .layer.learning_rate import NaturalExpDecay #DEFINE_ALIAS
# from .layer.learning_rate import NoamDecay #DEFINE_ALIAS
# from .layer.learning_rate import PiecewiseDecay #DEFINE_ALIAS
# from .layer.learning_rate import PolynomialDecay #DEFINE_ALIAS
from .layer.common import Linear from .layer.common import Linear
# from .layer.loss import NCELoss #DEFINE_ALIAS # from .layer.loss import NCELoss #DEFINE_ALIAS
from .layer.loss import BCEWithLogitsLoss #DEFINE_ALIAS from .layer.loss import BCEWithLogitsLoss #DEFINE_ALIAS

@ -95,14 +95,6 @@ from .extension import target_assign #DEFINE_ALIAS
from .extension import temporal_shift #DEFINE_ALIAS from .extension import temporal_shift #DEFINE_ALIAS
from .extension import warpctc #DEFINE_ALIAS from .extension import warpctc #DEFINE_ALIAS
from .extension import diag_embed #DEFINE_ALIAS from .extension import diag_embed #DEFINE_ALIAS
from .learning_rate import cosine_decay #DEFINE_ALIAS
from .learning_rate import exponential_decay #DEFINE_ALIAS
from .learning_rate import inverse_time_decay #DEFINE_ALIAS
from .learning_rate import natural_exp_decay #DEFINE_ALIAS
from .learning_rate import noam_decay #DEFINE_ALIAS
from .learning_rate import piecewise_decay #DEFINE_ALIAS
from .learning_rate import polynomial_decay #DEFINE_ALIAS
from .learning_rate import linear_lr_warmup #DEFINE_ALIAS
# from .lod import sequence_concat #DEFINE_ALIAS # from .lod import sequence_concat #DEFINE_ALIAS
# from .lod import sequence_conv #DEFINE_ALIAS # from .lod import sequence_conv #DEFINE_ALIAS
# from .lod import sequence_enumerate #DEFINE_ALIAS # from .lod import sequence_enumerate #DEFINE_ALIAS

@ -1,29 +0,0 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: define learning rate decay
from ...fluid.layers import cosine_decay #DEFINE_ALIAS
from ...fluid.layers import exponential_decay #DEFINE_ALIAS
from ...fluid.layers import inverse_time_decay #DEFINE_ALIAS
from ...fluid.layers import natural_exp_decay #DEFINE_ALIAS
from ...fluid.layers import noam_decay #DEFINE_ALIAS
from ...fluid.layers import piecewise_decay #DEFINE_ALIAS
from ...fluid.layers import polynomial_decay #DEFINE_ALIAS
from ...fluid.layers import linear_lr_warmup #DEFINE_ALIAS
__all__ = [
'cosine_decay', 'exponential_decay', 'inverse_time_decay',
'natural_exp_decay', 'noam_decay', 'piecewise_decay', 'polynomial_decay',
'linear_lr_warmup'
]

@ -86,13 +86,6 @@ from .conv import ConvTranspose3d #DEFINE_ALIAS
# from .conv import TreeConv #DEFINE_ALIAS # from .conv import TreeConv #DEFINE_ALIAS
# from .conv import Conv1D #DEFINE_ALIAS # from .conv import Conv1D #DEFINE_ALIAS
from .extension import RowConv #DEFINE_ALIAS from .extension import RowConv #DEFINE_ALIAS
# from .learning_rate import CosineDecay #DEFINE_ALIAS
# from .learning_rate import ExponentialDecay #DEFINE_ALIAS
# from .learning_rate import InverseTimeDecay #DEFINE_ALIAS
# from .learning_rate import NaturalExpDecay #DEFINE_ALIAS
# from .learning_rate import NoamDecay #DEFINE_ALIAS
# from .learning_rate import PiecewiseDecay #DEFINE_ALIAS
# from .learning_rate import PolynomialDecay #DEFINE_ALIAS
# from .loss import NCELoss #DEFINE_ALIAS # from .loss import NCELoss #DEFINE_ALIAS
from .loss import BCEWithLogitsLoss #DEFINE_ALIAS from .loss import BCEWithLogitsLoss #DEFINE_ALIAS
from .loss import CrossEntropyLoss #DEFINE_ALIAS from .loss import CrossEntropyLoss #DEFINE_ALIAS

@ -1,25 +0,0 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: define learning rate decay
__all__ = [
# 'CosineDecay',
# 'ExponentialDecay',
# 'InverseTimeDecay',
# 'NaturalExpDecay',
# 'NoamDecay',
# 'PiecewiseDecay',
# 'PolynomialDecay'
]

@ -16,10 +16,7 @@ __all__ = [
'Adadelta', 'AdadeltaOptimizer', 'Adagrad', 'AdagradOptimizer', 'Adam', 'Adadelta', 'AdadeltaOptimizer', 'Adagrad', 'AdagradOptimizer', 'Adam',
'Adamax', 'AdamW', 'DecayedAdagrad', 'DecayedAdagradOptimizer', 'Dpsgd', 'Adamax', 'AdamW', 'DecayedAdagrad', 'DecayedAdagradOptimizer', 'Dpsgd',
'DpsgdOptimizer', 'Ftrl', 'FtrlOptimizer', 'Momentum', 'MomentumOptimizer', 'DpsgdOptimizer', 'Ftrl', 'FtrlOptimizer', 'Momentum', 'MomentumOptimizer',
'RMSProp', 'SGD', 'SGDOptimizer', 'Optimizer', '_LRScheduler', 'NoamLR', 'RMSProp', 'SGD', 'SGDOptimizer', 'Optimizer'
'PiecewiseLR', 'NaturalExpLR', 'InverseTimeLR', 'PolynomialLR',
'LinearLrWarmup', 'ExponentialLR', 'MultiStepLR', 'StepLR', 'LambdaLR',
'ReduceLROnPlateau', 'CosineAnnealingLR'
] ]
@ -36,6 +33,4 @@ from .adadelta import Adadelta
from .sgd import SGD from .sgd import SGD
from .momentum import Momentum from .momentum import Momentum
from . import lr_scheduler from . import lr
from .lr_scheduler import _LRScheduler, NoamLR, PiecewiseLR, NaturalExpLR, InverseTimeLR, PolynomialLR, \
LinearLrWarmup, ExponentialLR, MultiStepLR, StepLR, LambdaLR, ReduceLROnPlateau, CosineAnnealingLR

@ -48,8 +48,8 @@ class Adam(Optimizer):
Related paper: `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_ Related paper: `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_
Args: Args:
learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. The default value is 0.001. It can be a float value or a LRScheduler. The default value is 0.001.
beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates.
It should be a float number or a Tensor with shape [1] and data type as float32. It should be a float number or a Tensor with shape [1] and data type as float32.
The default value is 0.9. The default value is 0.9.

@ -47,8 +47,8 @@ class Adamax(Optimizer):
it is added here for numerical stability to prevent the division by 0 error. it is added here for numerical stability to prevent the division by 0 error.
Args: Args:
learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. The default value is 0.001. It can be a float value or a LRScheduler. The default value is 0.001.
beta1 (float, optional): The exponential decay rate for the 1st moment estimates. beta1 (float, optional): The exponential decay rate for the 1st moment estimates.
The default value is 0.9. The default value is 0.9.
beta2 (float, optional): The exponential decay rate for the 2nd moment estimates. beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.

@ -42,8 +42,8 @@ class AdamW(Adam):
Args: Args:
learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. The default value is 0.001. It can be a float value or a LRScheduler. The default value is 0.001.
parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \
This parameter is required in dygraph mode. \ This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated. The default value is None in static mode, at this time all parameters will be updated.

@ -41,7 +41,7 @@ from paddle.fluid.layers import tensor
from functools import reduce from functools import reduce
from ..fluid.wrapped_decorator import signature_safe_contextmanager from ..fluid.wrapped_decorator import signature_safe_contextmanager
from .. import compat as cpt from .. import compat as cpt
from .lr_scheduler import _LRScheduler from .lr import LRScheduler
__all__ = ['Optimizer'] __all__ = ['Optimizer']
@ -54,8 +54,8 @@ class Optimizer(object):
but need to use one of it's implementation. but need to use one of it's implementation.
Args: Args:
learning_rate (float|_LRScheduler): The learning rate used to update ``Parameter``. learning_rate (float|LRScheduler): The learning rate used to update ``Parameter``.
It can be a float value or any subclass of ``_LRScheduler`` . It can be a float value or any subclass of ``LRScheduler`` .
parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \
This parameter is required in dygraph mode. \ This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated. The default value is None in static mode, at this time all parameters will be updated.
@ -82,12 +82,8 @@ class Optimizer(object):
#Take the subclass adam as an example #Take the subclass adam as an example
import paddle import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp) inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
out = linear(inp) out = linear(inp)
loss = paddle.mean(out) loss = paddle.mean(out)
adam = paddle.optimizer.Adam(learning_rate=0.1, adam = paddle.optimizer.Adam(learning_rate=0.1,
@ -121,9 +117,9 @@ class Optimizer(object):
"The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!" "The weight_decay[%s] in Optimizer will not take effect, and it will only be applied to other Parameters!"
% weight_decay.__str__()) % weight_decay.__str__())
break break
if not isinstance(learning_rate, (float, _LRScheduler)): if not isinstance(learning_rate, (float, LRScheduler)):
raise TypeError( raise TypeError(
"learning rate should be float or _LRScheduler, got %s here" % "learning rate should be float or LRScheduler, got %s here" %
type(learning_rate)) type(learning_rate))
if grad_clip is not None: if grad_clip is not None:
if not isinstance(grad_clip, GradientClipBase): if not isinstance(grad_clip, GradientClipBase):
@ -156,7 +152,7 @@ class Optimizer(object):
@framework.dygraph_only @framework.dygraph_only
def state_dict(self): def state_dict(self):
''' '''
Get state dict information from optimizer. It contain all the tensor used by optimizer. For Adam optimizer, contains beta1, beta2, momentum etc. If _LRScheduler have been used, global_step will be include in state dict. Get state dict information from optimizer. It contain all the tensor used by optimizer. For Adam optimizer, contains beta1, beta2, momentum etc. If LRScheduler have been used, global_step will be include in state dict.
If the optimizer never be called(minimize function), the state_dict is empty. If the optimizer never be called(minimize function), the state_dict is empty.
Args: Args:
@ -169,7 +165,6 @@ class Optimizer(object):
.. code-block:: python .. code-block:: python
import paddle import paddle
paddle.disable_static()
emb = paddle.nn.Embedding(10, 10) emb = paddle.nn.Embedding(10, 10)
adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters())
@ -181,14 +176,14 @@ class Optimizer(object):
for para_name, var_tmp in v.items(): for para_name, var_tmp in v.items():
state_dict[var_tmp.name] = var_tmp state_dict[var_tmp.name] = var_tmp
# global step if use lr decay # global step if use lr decay
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
state_dict["LR_Scheduler"] = self._learning_rate.state_dict() state_dict["LR_Scheduler"] = self._learning_rate.state_dict()
return state_dict return state_dict
@framework.dygraph_only @framework.dygraph_only
def set_state_dict(self, state_dict): def set_state_dict(self, state_dict):
''' '''
Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If _LRScheduler have been used, global_step will be changed. Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LRScheduler have been used, global_step will be changed.
Args: Args:
state_dict(dict) : Dict contains all the Tensor needed by optimizer state_dict(dict) : Dict contains all the Tensor needed by optimizer
@ -199,26 +194,28 @@ class Optimizer(object):
.. code-block:: python .. code-block:: python
import paddle import paddle
paddle.disable_static()
emb = paddle.nn.Embedding(10, 10)
state_dict = emb.state_dict() emb = paddle.nn.Embedding(10, 10)
paddle.framework.save(state_dict, "paddle_dy")
adam = paddle.optimizer.Adam(learning_rate=paddle.optimizer.NoamLR( 100, 10000), layer_state_dict = emb.state_dict()
parameters=emb.parameters()) paddle.save(layer_state_dict, "emb.pdparams")
state_dict = adam.state_dict()
paddle.framework.save(state_dict, "paddle_dy")
para_state_dict, opti_state_dict = paddle.framework.load( "paddle_dy") scheduler = paddle.optimizer.lr.NoamDecay(
d_model=0.01, warmup_steps=100, verbose=True)
adam = paddle.optimizer.Adam(
learning_rate=scheduler,
parameters=emb.parameters())
opt_state_dict = adam.state_dict()
paddle.save(opt_state_dict, "adam.pdopt")
opti_state_dict = paddle.load("adam.pdopt")
adam.set_state_dict(opti_state_dict) adam.set_state_dict(opti_state_dict)
''' '''
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
self._learning_rate.set_dict(state_dict["LR_Scheduler"]) self._learning_rate.set_dict(state_dict["LR_Scheduler"])
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
self._learning_rate.set_state_dict(state_dict["LR_Scheduler"]) self._learning_rate.set_state_dict(state_dict["LR_Scheduler"])
self._accumulators_holder = state_dict self._accumulators_holder = state_dict
@ -256,7 +253,7 @@ class Optimizer(object):
return self._opti_name_list return self._opti_name_list
def _create_global_learning_rate(self): def _create_global_learning_rate(self):
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
lr_var = self._global_learning_rate() lr_var = self._global_learning_rate()
# only create global lr_var once # only create global lr_var once
if not isinstance(lr_var, framework.Variable): if not isinstance(lr_var, framework.Variable):
@ -299,7 +296,7 @@ class Optimizer(object):
""" """
:api_attr: imperative :api_attr: imperative
Set the value of the learning rate manually in the optimizer. If the optimizer use _LRScheduler, Set the value of the learning rate manually in the optimizer. If the optimizer use LRScheduler,
this API cannot be invoked, because it will lead to conflict. this API cannot be invoked, because it will lead to conflict.
Args: Args:
@ -312,7 +309,6 @@ class Optimizer(object):
.. code-block:: python .. code-block:: python
import paddle import paddle
paddle.disable_static()
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters()) adam = paddle.optimizer.Adam(0.1, parameters=linear.parameters())
@ -335,9 +331,9 @@ class Optimizer(object):
raise TypeError( raise TypeError(
"The type of 'value' in optimizer.set_lr must be float, but received %s." "The type of 'value' in optimizer.set_lr must be float, but received %s."
% (type(value))) % (type(value)))
if isinstance(self._learning_rate, _LRScheduler): if isinstance(self._learning_rate, LRScheduler):
raise RuntimeError( raise RuntimeError(
"optimizer's learning rate can't be _LRScheduler when invoke this API, because this will lead to conflict." "optimizer's learning rate can't be LRScheduler when invoke this API, because this will lead to conflict."
) )
self._learning_rate = float(value) self._learning_rate = float(value)
current_lr = self._global_learning_rate() current_lr = self._global_learning_rate()
@ -358,7 +354,7 @@ class Optimizer(object):
""" """
:api_attr: imperative :api_attr: imperative
Get current step learning rate. The return value is all the same When _LRScheduler is not used, Get current step learning rate. The return value is all the same When LRScheduler is not used,
otherwise return the current step learning rate. otherwise return the current step learning rate.
@ -370,15 +366,13 @@ class Optimizer(object):
import numpy as np import numpy as np
import paddle import paddle
# example1: _LRScheduler is not used, return value is all the same # example1: LRScheduler is not used, return value is all the same
paddle.disable_static()
emb = paddle.nn.Embedding(10, 10) emb = paddle.nn.Embedding(10, 10)
adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters()) adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters())
lr = adam.get_lr() lr = adam.get_lr()
print(lr) # 0.001 print(lr) # 0.001
# example2: PiecewiseLR is used, return the step learning rate # example2: PiecewiseDecay is used, return the scheduled learning rate
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp) inp = paddle.to_tensor(inp)
@ -387,7 +381,7 @@ class Optimizer(object):
bd = [2, 4, 6, 8] bd = [2, 4, 6, 8]
value = [0.2, 0.4, 0.6, 0.8, 1.0] value = [0.2, 0.4, 0.6, 0.8, 1.0]
scheduler = paddle.optimizer.PiecewiseLR(bd, value, 0) scheduler = paddle.optimizer.lr.PiecewiseDecay(bd, value, 0)
adam = paddle.optimizer.Adam(scheduler, adam = paddle.optimizer.Adam(scheduler,
parameters=linear.parameters()) parameters=linear.parameters())
@ -656,7 +650,6 @@ class Optimizer(object):
import paddle import paddle
import numpy as np import numpy as np
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)
@ -727,7 +720,6 @@ class Optimizer(object):
import paddle import paddle
import numpy as np import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp) inp = paddle.to_tensor(inp)
@ -805,7 +797,7 @@ class Optimizer(object):
import numpy as np import numpy as np
import paddle import paddle
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)
@ -854,13 +846,9 @@ class Optimizer(object):
.. code-block:: python .. code-block:: python
import paddle import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10) linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp) input = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
out = linear(inp) out = linear(input)
loss = paddle.mean(out) loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32") beta1 = paddle.to_tensor([0.9], dtype="float32")
@ -903,7 +891,7 @@ class Optimizer(object):
import paddle import paddle
import numpy as np import numpy as np
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32") value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value) a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5) linear = paddle.nn.Linear(13, 5)

@ -69,8 +69,8 @@ class RMSProp(Optimizer):
Parameters: Parameters:
learning_rate (float|_LRScheduler): The learning rate used to update ``Parameter``. learning_rate (float|LRScheduler): The learning rate used to update ``Parameter``.
It can be a float value or a _LRScheduler. It can be a float value or a LRScheduler.
rho(float): rho is :math: `\\rho` in equation, default is 0.95. rho(float): rho is :math: `\\rho` in equation, default is 0.95.
epsilon(float): :math: `\\epsilon` in equation is smoothing term to epsilon(float): :math: `\\epsilon` in equation is smoothing term to
avoid division by zero, default is 1e-6. avoid division by zero, default is 1e-6.

Loading…
Cancel
Save