From 52790b74e685da24ea25fc47351da08591f581cf Mon Sep 17 00:00:00 2001
From: liuxiao <liuxiao93@huawei.com>
Date: Tue, 9 Jun 2020 21:01:41 +0800
Subject: [PATCH] Add some description to API about optimizer.

---
 mindspore/nn/optim/adam.py         | 17 +++++++++--------
 mindspore/nn/optim/lamb.py         | 10 ++++++----
 mindspore/nn/optim/lars.py         |  4 ++--
 mindspore/nn/optim/sgd.py          |  9 +++++----
 mindspore/ops/operations/nn_ops.py |  4 ++--
 5 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 40237a22d7..8844fc415e 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -162,13 +162,14 @@ class Adam(Optimizer):
               in the value of 'order_params' but not in any group will use default learning rate and default weight
               decay.
 
-        learning_rate (Union[float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
-                                                        Iterable or a Tensor and the dims of the Tensor is 1,
-                                                        use dynamic learning rate, then the i-th step will
-                                                        take the i-th value as the learning rate.
-                                                        When the learning_rate is float or learning_rate is a Tensor
-                                                        but the dims of the Tensor is 0, use fixed learning rate.
-                                                        Other cases are not supported. Default: 1e-3.
+        learning_rate (Union[int, float, Tensor, Iterable]): A value for the learning rate. When the learning_rate is
+                                                             Iterable or a Tensor and the dims of the Tensor is 1,
+                                                             use dynamic learning rate, then the i-th step will
+                                                             take the i-th value as the learning rate.
+                                                             When the learning_rate is float or learning_rate is a
+                                                             Tensor but the dims of the Tensor is 0, use fixed learning
+                                                             rate. Other cases are not supported. It should be equal to
+                                                             or greater than 0. Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Should be in range (0.0, 1.0). Default:
                        0.9.
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Should be in range (0.0, 1.0). Default:
@@ -181,7 +182,7 @@ class Adam(Optimizer):
         use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
             If True, updates the gradients using NAG.
             If False, updates the gradients without using NAG. Default: False.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
         loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index d8cc5b4ce4..83a299b742 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -143,10 +143,12 @@ class Lamb(Optimizer):
         params (list[Parameter]): A list of parameter, which will be updated. The element in `params`
                                   should be class mindspore.Parameter.
         decay_steps (int): The steps of the lr decay. Should be equal to or greater than 1.
-        warmup_steps (int): The steps of lr warm up. Default: 0.
-        start_learning_rate (float): A floating point value for the learning rate. Default: 0.1.
-        end_learning_rate (float): A floating point value for the end learning rate. Default: 0.0001.
-        power (float): The power of the polynomial. Default: 1.0.
+        warmup_steps (int): The steps of lr warm up. Should be equal to or greater than 0. Default: 0.
+        start_learning_rate (float): A floating point value for the learning rate. Should be equal to
+            or greater than 0. Default: 0.1.
+        end_learning_rate (float): A floating point value for the end learning rate. Should be equal to
+            or greater than 0. Default: 0.0001.
+        power (float): The power of the polynomial. It must be positive. Default: 1.0.
         beta1 (float): The exponential decay rate for the 1st moment estimates. Default: 0.9.
             Should be in range (0.0, 1.0).
         beta2 (float): The exponential decay rate for the 2nd moment estimates. Default: 0.999.
diff --git a/mindspore/nn/optim/lars.py b/mindspore/nn/optim/lars.py
index 001a578ffe..ca554788a4 100755
--- a/mindspore/nn/optim/lars.py
+++ b/mindspore/nn/optim/lars.py
@@ -59,13 +59,13 @@ class LARS(Optimizer):
         optimizer (Optimizer): MindSpore optimizer for which to wrap and modify gradients.
         epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
         hyperpara (float): Trust coefficient for calculating the local learning rate. Default: 0.001.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
         use_clip (bool): Whether to use clip operation for calculating the local learning rate. Default: False.
         decay_filter (Function): A function to determine whether apply weight decay on parameters. Default:
                                  lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
         lars_filter (Function): A function to determine whether apply lars algorithm. Default:
                                 lambda x: 'LayerNorm' not in x.name and 'bias' not in x.name.
-        loss_scale (float): A floating point value for the loss scale. Default: 1.0.
+        loss_scale (float): A floating point value for the loss scale. It should be greater than 0. Default: 1.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params` in optimizer, the shape is
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index a7493400f8..d138fea529 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -73,10 +73,11 @@ class SGD(Optimizer):
                                                         take the i-th value as the learning rate.
                                                         When the learning_rate is float or learning_rate is a Tensor
                                                         but the dims of the Tensor is 0, use fixed learning rate.
-                                                        Other cases are not supported. Default: 0.1.
-        momentum (float): A floating point value the momentum. Default: 0.0.
-        dampening (float): A floating point value of dampening for momentum. Default: 0.0.
-        weight_decay (float): Weight decay (L2 penalty). Default: 0.0.
+                                                        Other cases are not supported. It should be equal to or
+                                                        greater than 0. Default: 0.1.
+        momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0.
+        dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
         nesterov (bool): Enables the Nesterov momentum. Default: False.
         loss_scale (float): A floating point value for the loss scale, which should be larger
                             than 0.0. Default: 1.0.
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 91b09d2553..f3e1392553 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -3159,7 +3159,7 @@ class SparseApplyFtrl(PrimitiveWithInfer):
         validator.check_value_type("l1", l1, [float], self.name)
         validator.check_value_type("l2", l2, [float], self.name)
         validator.check_value_type("lr_power", lr_power, [float], self.name)
-        self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_LEFT, self.name)
+        self.lr = validator.check_number_range("lr", lr, 0.0, float("inf"), Rel.INC_NEITHER, self.name)
         self.l1 = validator.check_number("l1", l1, 0.0, Rel.GE, self.name)
         self.l2 = validator.check_number("l2", l2, 0.0, Rel.GE, self.name)
         self.lr_power = validator.check_number("lr_power", lr_power, 0, Rel.LE, self.name)
@@ -3350,7 +3350,7 @@ class CTCLoss(PrimitiveWithInfer):
     """
 
     @prim_attr_register
-    def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=False,
+    def __init__(self, preprocess_collapse_repeated=False, ctc_merge_repeated=True,
                  ignore_longer_outputs_than_inputs=False):
         self.init_prim_io_names(inputs=["inputs", "labels_indices", "labels_values", "sequence_length"],
                                 outputs=["loss", "gradient"])