!1958 Fix some description to API about optimizer.

Merge pull request !1958 from liuxiao/fix-for-issuse
mindspore-ci-bot 5 years ago committed by Gitee
commit 553432c968

@ -162,13 +162,14 @@ class Adam(Optimizer):
in the value of 'order_params' but not in any group will use default learning rate and default weight
learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
Iterable or a Tensor and the dims of the Tensor is 1,
use dynamic learning rate, then the i-th step will
take the i-th value as the learning rate.
When the learning_rate is float or learning_rate is a Tensor
but the dims of the Tensor is 0, use fixed learning rate.
Other cases are not supported. Default: 1e-3.
When the learning_rate is float or learning_rate is a
Tensor but the dims of the Tensor is 0, use fixed learning
rate. Other cases are not supported. It should be equal to
or greater than 0. Default: 1e-3.
beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default:
beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default:
@ -181,7 +182,7 @@ class Adam(Optimizer):
use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
If True, updates the gradients using NAG.
If False, updates the gradients without using NAG. Default: False.
weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.

@ -143,10 +143,12 @@ class Lamb(Optimizer):
params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
should be class mindspore.Parameter.
decay_steps (int): The steps of the lr decay. Should be equal to or greater than 1.
warmup_steps (int): The steps of lr warm up. Default: 0.
start_learning_rate (float): A floating point value for the learning rate. Default: 0.1.
end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001.
power (float): The power of the polynomial. Default: 1.0.
warmup_steps (int): The steps of lr warm up. Should be equal to or greater than 0. Default: 0.
start_learning_rate (float): A floating point value for the learning rate. Should be equal to
or greater than 0. Default: 0.1.
end_learning_rate (float): A floating point value for the end learning rate. Should be equal to
or greater than 0. Default: 0.0001.
power (float): The power of the polynomial. It must be positive. Default: 1.0.
beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
Should be in range (0.0, 1.0).
beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.

@ -59,13 +59,13 @@ class LARS(Optimizer):
optimizer (Optimizer): MindSpore optimizer for which to wrap and modify gradients.
epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
hyperpara (float): Trust coefficient for calculating the local learning rate. Default: 0.001.
weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False.
decay_filter (Function): A function to determine whether apply weight decay on parameters. Default:
lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
lars_filter (Function): A function to determine whether apply lars algorithm. Default:
lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
loss_scale (float): A floating point value for the loss scale. Default: 1.0.
loss_scale (float): A floating point value for the loss scale. It should be greater than 0. Default: 1.0.
- **gradients** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is

@ -73,10 +73,11 @@ class SGD(Optimizer):
take the i-th value as the learning rate.
When the learning_rate is float or learning_rate is a Tensor
but the dims of the Tensor is 0, use fixed learning rate.
Other cases are not supported. Default: 0.1.
momentum (float): A floating point value the momentum. Default: 0.0.
dampening (float): A floating point value of dampening for momentum. Default: 0.0.
weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
Other cases are not supported. It should be equal to or
greater than 0. Default: 0.1.
momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0.
dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0.
weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
nesterov (bool): Enables the Nesterov momentum. Default: False.
loss_scale (float): A floating point value for the loss scale, which should be larger
than 0.0. Default: 1.0.

@ -3465,7 +3465,7 @@ class SparseApplyFtrl(PrimitiveWithInfer):
validator.check_value_type("l1", l1, [float], self.name)
validator.check_value_type("l2", l2, [float], self.name)
validator.check_value_type("lr_power", lr_power, [float], self.name)
self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_LEFT, self.name)
self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name)
self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name)
self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name)
self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
@ -3656,7 +3656,7 @@ class CTCLoss(PrimitiveWithInfer):
def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True,
self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"],
outputs=["loss", "gradient"])
