|
|
|
@ -325,9 +325,10 @@ class AdamWeightDecayDynamicLR(Optimizer):
|
|
|
|
|
params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
|
|
|
|
|
should be class mindspore.Parameter.
|
|
|
|
|
decay_steps (int): The steps of the decay.
|
|
|
|
|
warmup_steps (int): The steps of lr warm up. Default: 0.
|
|
|
|
|
learning_rate (float): A floating point value for the learning rate. Default: 0.001.
|
|
|
|
|
end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001.
|
|
|
|
|
power (float): Power. Default: 10.0.
|
|
|
|
|
power (float): The Power of the polynomial. Default: 10.0.
|
|
|
|
|
beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
|
|
|
|
|
Should be in range (0.0, 1.0).
|
|
|
|
|
beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.
|
|
|
|
@ -353,6 +354,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
|
|
|
|
|
def __init__(self,
|
|
|
|
|
params,
|
|
|
|
|
decay_steps,
|
|
|
|
|
warmup_steps=0,
|
|
|
|
|
learning_rate=0.001,
|
|
|
|
|
end_learning_rate=0.0001,
|
|
|
|
|
power=10.0,
|
|
|
|
@ -360,8 +362,7 @@ class AdamWeightDecayDynamicLR(Optimizer):
|
|
|
|
|
beta2=0.999,
|
|
|
|
|
eps=1e-6,
|
|
|
|
|
weight_decay=0.0,
|
|
|
|
|
decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name,
|
|
|
|
|
warmup_steps=0):
|
|
|
|
|
decay_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name):
|
|
|
|
|
super(AdamWeightDecayDynamicLR, self).__init__(learning_rate, params)
|
|
|
|
|
if self.is_group:
|
|
|
|
|
raise RuntimeError(f"The {self.cls_name} optimizer cannot support group setting.")
|
|
|
|
|