From 2d7b93e9583690fd9f68c2fc96b531e7f83a9365 Mon Sep 17 00:00:00 2001
From: panfengfeng <panfengfeng@huawei.com>
Date: Fri, 18 Sep 2020 22:28:03 +0800
Subject: [PATCH] fix nn & operations api comments

---
 mindspore/nn/cell.py                          |   8 +-
 mindspore/nn/dynamic_lr.py                    |   8 +-
 mindspore/nn/graph_kernels/graph_kernels.py   |  14 +-
 mindspore/nn/layer/basic.py                   |  14 +-
 mindspore/nn/layer/conv.py                    |  24 +-
 mindspore/nn/layer/embedding.py               |  12 +-
 mindspore/nn/layer/image.py                   |  12 +-
 mindspore/nn/layer/lstm.py                    |   4 +-
 mindspore/nn/layer/math.py                    |  12 +-
 mindspore/nn/layer/pooling.py                 |   2 +-
 mindspore/nn/layer/quant.py                   |  30 +-
 mindspore/nn/learning_rate_schedule.py        |  12 +-
 mindspore/nn/loss/loss.py                     |  10 +-
 mindspore/nn/metrics/_evaluation.py           |   6 +-
 mindspore/nn/metrics/loss.py                  |   2 +-
 mindspore/nn/metrics/metric.py                |   6 +-
 mindspore/nn/metrics/precision.py             |   2 +-
 mindspore/nn/metrics/recall.py                |   2 +-
 mindspore/nn/optim/adam.py                    |  24 +-
 mindspore/nn/optim/ftrl.py                    |  14 +-
 mindspore/nn/optim/lamb.py                    |  10 +-
 mindspore/nn/optim/lazyadam.py                |  10 +-
 mindspore/nn/optim/momentum.py                |  16 +-
 mindspore/nn/optim/optimizer.py               |  16 +-
 mindspore/nn/optim/proximal_ada_grad.py       |  14 +-
 mindspore/nn/optim/rmsprop.py                 |  15 +-
 mindspore/nn/optim/sgd.py                     |  18 +-
 mindspore/nn/probability/bijector/bijector.py |   2 +-
 .../bnn_layers/conv_variational.py            |  16 +-
 .../bnn_layers/dense_variational.py           |   8 +-
 .../nn/probability/distribution/bernoulli.py  |   2 +-
 .../probability/distribution/categorical.py   |   2 +-
 .../probability/distribution/distribution.py  |   4 +-
 .../probability/distribution/exponential.py   |  10 +-
 .../nn/probability/distribution/geometric.py  |   2 +-
 .../nn/probability/distribution/normal.py     |   4 +-
 .../nn/probability/distribution/uniform.py    |   4 +-
 mindspore/nn/probability/dpn/vae/cvae.py      |   6 +-
 mindspore/nn/probability/dpn/vae/vae.py       |   6 +-
 .../toolbox/uncertainty_evaluation.py         |  10 +-
 .../probability/transforms/transform_bnn.py   |   6 +-
 mindspore/nn/wrap/cell_wrapper.py             |   2 +-
 mindspore/nn/wrap/grad_reducer.py             |   2 +-
 mindspore/nn/wrap/loss_scale.py               |   8 +-
 mindspore/ops/operations/_grad_ops.py         |  76 ++---
 mindspore/ops/operations/_inner_ops.py        |  30 +-
 mindspore/ops/operations/_quant_ops.py        |  64 ++--
 mindspore/ops/operations/_thor_ops.py         |  42 +--
 mindspore/ops/operations/array_ops.py         | 252 +++++++-------
 mindspore/ops/operations/comm_ops.py          |   4 +-
 mindspore/ops/operations/control_ops.py       |  10 +-
 mindspore/ops/operations/debug_ops.py         |  32 +-
 mindspore/ops/operations/image_ops.py         |   4 +-
 mindspore/ops/operations/math_ops.py          | 172 +++++-----
 mindspore/ops/operations/nn_ops.py            | 307 +++++++++---------
 mindspore/ops/operations/other_ops.py         |  40 +--
 mindspore/ops/operations/random_ops.py        |  54 +--
 mindspore/ops/operations/sparse_ops.py        |   4 +-
 58 files changed, 753 insertions(+), 749 deletions(-)

diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 76f7f13a5c..e509815587 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -614,7 +614,7 @@ class Cell(Cell_):
         """
         Defines the computation to be performed.
 
-        This method should be overridden by all subclasses.
+        This method must be overridden by all subclasses.
 
         Note:
             The inputs of the top cell only allow Tensor.
@@ -748,7 +748,7 @@ class Cell(Cell_):
         Yields parameters of this cell. If `expand` is True, yield parameters of this cell and all subcells.
 
         Args:
-            expand (bool): If True, yields parameters of this cell and all subcells. Otherwise, only yield parameters
+            expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
                            that are direct members of this cell. Default: True.
 
         Examples:
@@ -775,7 +775,7 @@ class Cell(Cell_):
 
         Args:
             name_prefix (str): Namespace. Default: ''.
-            expand (bool): If True, yields parameters of this cell and all subcells. Otherwise, only yield parameters
+            expand (bool): If true, yields parameters of this cell and all subcells. Otherwise, only yield parameters
                            that are direct members of this cell. Default: True.
 
         Examples:
@@ -990,7 +990,7 @@ class Cell(Cell_):
         Set the cell backward hook function. Note that this function is only supported in Pynative Mode.
 
         Note:
-            fn should be defined as the following code. `cell_name` is the name of registered cell.
+            fn must be defined as the following code. `cell_name` is the name of registered cell.
             `grad_input` is gradient passed to the cell. `grad_output` is the gradient computed and passed to the
             next cell or primitve, which may be modified and returned.
             >>> hook_fn(cell_name, grad_input, grad_output) -> Tensor or None
diff --git a/mindspore/nn/dynamic_lr.py b/mindspore/nn/dynamic_lr.py
index 81f46029e6..98a72c444c 100644
--- a/mindspore/nn/dynamic_lr.py
+++ b/mindspore/nn/dynamic_lr.py
@@ -90,7 +90,7 @@ def exponential_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
         total_step (int): The total number of steps.
         step_per_epoch (int): The number of steps in per epoch.
         decay_epoch (int): A value used to calculate decayed learning rate.
-        is_stair (bool): If True, learning rate is decayed once every `decay_epoch` times. Default: False.
+        is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
 
     Returns:
         list[float]. The size of list is `total_step`.
@@ -132,7 +132,7 @@ def natural_exp_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch,
         total_step (int): The total number of steps.
         step_per_epoch (int): The number of steps in per epoch.
         decay_epoch (int): A value used to calculate decayed learning rate.
-        is_stair (bool): If True, learning rate is decayed once every `decay_epoch` times. Default: False.
+        is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
 
     Returns:
         list[float]. The size of list is `total_step`.
@@ -175,7 +175,7 @@ def inverse_decay_lr(learning_rate, decay_rate, total_step, step_per_epoch, deca
         total_step (int): The total number of steps.
         step_per_epoch (int): The number of steps in per epoch.
         decay_epoch (int): A value used to calculate decayed learning rate.
-        is_stair (bool): If True, learning rate is decayed once every `decay_epoch` times. Default: False.
+        is_stair (bool): If true, learning rate is decayed once every `decay_epoch` times. Default: False.
 
     Returns:
         list[float]. The size of list is `total_step`.
@@ -283,7 +283,7 @@ def polynomial_decay_lr(learning_rate, end_learning_rate, total_step, step_per_e
         total_step (int): The total number of steps.
         step_per_epoch (int): The number of steps in per epoch.
         decay_epoch (int): A value used to calculate decayed learning rate.
-        power (float): A value used to calculate decayed learning rate. This parameter should be greater than 0.
+        power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
         update_decay_epoch (bool): If true, update `decay_epoch`. Default: False.
 
     Returns:
diff --git a/mindspore/nn/graph_kernels/graph_kernels.py b/mindspore/nn/graph_kernels/graph_kernels.py
index cd62b1c4a9..c26dcf573d 100644
--- a/mindspore/nn/graph_kernels/graph_kernels.py
+++ b/mindspore/nn/graph_kernels/graph_kernels.py
@@ -106,11 +106,11 @@ class MinimumGrad(GraphKernel):
     """
     Backprop function for Minimum operator.
 
-    Compares x and y elementwise, dout should has the same shape with x and y.
+    Compares x and y elementwise, dout must has the same shape with x and y.
 
     Inputs:
         - **x** (Tensor) - The first input
-        - **y** (Tensor) - x and y should have same shape
+        - **y** (Tensor) - x and y must have same shape
         - **dout** (Tensor) - Has the same shape as x and y, next operator's backprop output
 
     Outputs:
@@ -274,7 +274,7 @@ class EqualCount(GraphKernel):
     """
     Computes the number of the same elements of two tensors.
 
-    The two input tensors should have the same shape and data type.
+    The two input tensors must have the same shape and data type.
 
     Inputs:
         x (Tensor): the first input tensor.
@@ -309,8 +309,8 @@ class ReduceMean(GraphKernel):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions. Default: False.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions. Default: False.
 
     Inputs:
         - **input_x** (Tensor[Number]) - The input tensor.
@@ -1000,10 +1000,10 @@ class LayerNorm(Cell):
         normalized_shape (Union(tuple[int], list[int]): The normalization is performed over axis
             `begin_norm_axis ... R - 1`.
         begin_norm_axis (int): It first normalization dimension: normalization will be performed along dimensions
-            `begin_norm_axis: rank(inputs)`, the value should be in [-1, rank(input)). Default: -1.
+            `begin_norm_axis: rank(inputs)`, the value must be in [-1, rank(input)). Default: -1.
         begin_params_axis (int): The first parameter(beta, gamma)dimension: scale and centering parameters
             will have dimensions `begin_params_axis: rank(inputs)` and will be broadcast with
-            the normalized inputs accordingly, the value should be in [-1, rank(input)). Default: -1.
+            the normalized inputs accordingly, the value must be in [-1, rank(input)). Default: -1.
         gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
             The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
             'he_uniform', etc. Default: 'ones'.
diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py
index df33f1b26b..a87d93c326 100644
--- a/mindspore/nn/layer/basic.py
+++ b/mindspore/nn/layer/basic.py
@@ -279,7 +279,7 @@ class ClipByNorm(Cell):
     where :math:`L_2(X)` is the :math:`L_2`-norm of :math:`X`.
 
     Inputs:
-        - **input** (Tensor) - Tensor of shape N-D. The type should be float32 or float16.
+        - **input** (Tensor) - Tensor of shape N-D. The type must be float32 or float16.
         - **clip_norm** (Tensor) - A scalar Tensor of shape :math:`()` or :math:`(1)`.
 
     Outputs:
@@ -336,7 +336,7 @@ class Norm(Cell):
 
     Args:
         axis (Union[tuple, int]): The axis over which to compute vector norms. Default: ().
-        keep_dims (bool): If True, the axis indicated in `axis` are kept with size 1. Otherwise,
+        keep_dims (bool): If true, the axis indicated in `axis` are kept with size 1. Otherwise,
                    the dimensions in `axis` are removed from the output shape. Default: False.
 
     Inputs:
@@ -507,12 +507,12 @@ class Unfold(Cell):
     The input tensor must be a 4-D tensor and the data format is NCHW.
 
     Args:
-        ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or a list of integers,
+        ksizes (Union[tuple[int], list[int]]): The size of sliding window, must be a tuple or a list of integers,
             and the format is [1, ksize_row, ksize_col, 1].
         strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches,
-            should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
+            must be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
         rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dimension
-            pixel positions, should be a tuple or a list of integers, and the format is [1, rate_row, rate_col, 1].
+            pixel positions, must be a tuple or a list of integers, and the format is [1, rate_row, rate_col, 1].
         padding (str): The type of padding algorithm, is a string whose value is "same" or "valid",
             not case sensitive. Default: "valid".
 
@@ -575,7 +575,7 @@ class MatrixDiag(Cell):
           float32, float16, int32, int8, and uint8.
 
     Outputs:
-        Tensor, has the same type as input `x`. The shape should be x.shape + (x.shape[-1], ).
+        Tensor, has the same type as input `x`. The shape must be x.shape + (x.shape[-1], ).
 
     Examples:
         >>> x = Tensor(np.array([1, -1]), mstype.float32)
@@ -606,7 +606,7 @@ class MatrixDiagPart(Cell):
           float32, float16, int32, int8, and uint8.
 
     Outputs:
-        Tensor, has the same type as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])].
+        Tensor, has the same type as input `x`. The shape must be x.shape[:-2] + [min(x.shape[-2:])].
 
     Examples:
         >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
diff --git a/mindspore/nn/layer/conv.py b/mindspore/nn/layer/conv.py
index 1664422e53..87bb17fe43 100644
--- a/mindspore/nn/layer/conv.py
+++ b/mindspore/nn/layer/conv.py
@@ -160,7 +160,7 @@ class Conv2d(_Conv):
               must be 0.
 
             - pad: Implicit paddings on both sides of the input. The number of `padding` will be padded to the input
-              Tensor borders. `padding` should be greater than or equal to 0.
+              Tensor borders. `padding` must be greater than or equal to 0.
 
         padding (Union[int, tuple[int]]): Implicit paddings on both sides of the input. If `padding` is one integer,
                     the paddings of top, bottom, left and right are the same, equal to padding. If `padding` is a tuple
@@ -168,10 +168,10 @@ class Conv2d(_Conv):
                     padding[1], padding[2], and padding[3] accordingly. Default: 0.
         dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
                                       to use for dilated convolution. If set to be :math:`k > 1`, there will
-                                      be :math:`k - 1` pixels skipped for each sampling location. Its value should
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
                                       be greater or equal to 1 and bounded by the height and width of the
                                       input. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and `out_channels` should be
+        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
             divisible by the number of groups. If the group is equal to `in_channels` and `out_channels`,
             this 2D convolution layer also can be called 2D depthwise convolution layer. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
@@ -239,7 +239,7 @@ class Conv2d(_Conv):
         self.bias_add = P.BiasAdd()
 
     def _init_depthwise_conv2d(self):
-        """Init depthwise conv2d op"""
+        """Initialize depthwise conv2d op"""
         if context.get_context("device_target") == "Ascend" and self.group > 1:
             self.dilation = self._dilation
             validator.check_integer('group', self.group, self.in_channels, Rel.EQ)
@@ -335,15 +335,15 @@ class Conv1d(_Conv):
               must be 0.
 
             - pad: Implicit paddings on both sides of the input. The number of `padding` will be padded to the input
-              Tensor borders. `padding` should be greater than or equal to 0.
+              Tensor borders. `padding` must be greater than or equal to 0.
 
         padding (int): Implicit paddings on both sides of the input. Default: 0.
         dilation (int): The data type is int. Specifies the dilation rate
                                       to use for dilated convolution. If set to be :math:`k > 1`, there will
-                                      be :math:`k - 1` pixels skipped for each sampling location. Its value should
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
                                       be greater or equal to 1 and bounded by the height and width of the
                                       input. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and `out_channels` should be
+        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
             divisible by the number of groups. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         weight_init (Union[Tensor, str, Initializer, numbers.Number]): An initializer for the convolution kernel.
@@ -481,7 +481,7 @@ class Conv2dTranspose(_Conv):
             width of the kernel.
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
             the height and width of movement are both strides, or a tuple of two int numbers that
-            represent height and width of movement respectively. Its value should be equal to or greater than 1.
+            represent height and width of movement respectively. Its value must be equal to or greater than 1.
             Default: 1.
         pad_mode (str): Select the mode of the pad. The optional values are
             "pad", "same", "valid". Default: "same".
@@ -497,10 +497,10 @@ class Conv2dTranspose(_Conv):
                     padding[1], padding[2], and padding[3] accordingly. Default: 0.
         dilation (Union[int, tuple[int]]): The data type is int or a tuple of 2 integers. Specifies the dilation rate
                                       to use for dilated convolution. If set to be :math:`k > 1`, there will
-                                      be :math:`k - 1` pixels skipped for each sampling location. Its value should
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
                                       be greater than or equal to 1 and bounded by the height and width of the
                                       input. Default: 1.
-        group (int): Split filter into groups, `in_channels` and `out_channels` should be
+        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
             divisible by the number of groups. This does not support for Davinci devices when group > 1. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
@@ -662,10 +662,10 @@ class Conv1dTranspose(_Conv):
         padding (int): Implicit paddings on both sides of the input. Default: 0.
         dilation (int): The data type is int. Specifies the dilation rate
                                       to use for dilated convolution. If set to be :math:`k > 1`, there will
-                                      be :math:`k - 1` pixels skipped for each sampling location. Its value should
+                                      be :math:`k - 1` pixels skipped for each sampling location. Its value must
                                       be greater or equal to 1 and bounded by the width of the
                                       input. Default: 1.
-        group (int): Split filter into groups, `in_channels` and `out_channels` should be
+        group (int): Splits filter into groups, `in_channels` and `out_channels` must be
             divisible by the number of groups. This is not support for Davinci devices when group > 1. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
diff --git a/mindspore/nn/layer/embedding.py b/mindspore/nn/layer/embedding.py
index d148c68567..293d0d5291 100755
--- a/mindspore/nn/layer/embedding.py
+++ b/mindspore/nn/layer/embedding.py
@@ -36,7 +36,7 @@ class Embedding(Cell):
     the corresponding word embeddings.
 
     Note:
-        When 'use_one_hot' is set to True, the type of the input should be mindspore.int32.
+        When 'use_one_hot' is set to True, the type of the input must be mindspore.int32.
 
     Args:
         vocab_size (int): Size of the dictionary of embeddings.
@@ -49,7 +49,7 @@ class Embedding(Cell):
 
     Inputs:
         - **input** (Tensor) - Tensor of shape :math:`(\text{batch_size}, \text{input_length})`. The elements of
-          the Tensor should be integer and not larger than vocab_size. Otherwise the corresponding embedding vector will
+          the Tensor must be integer and not larger than vocab_size. Otherwise the corresponding embedding vector will
           be zero.
 
     Outputs:
@@ -120,7 +120,7 @@ class EmbeddingLookup(Cell):
         specified 'offset = 0' to lookup table.
         When 'target' is set to 'DEVICE', this module will use P.GatherV2() which
         specified 'axis = 0' to lookup table.
-        In field slice mode, the manual_shapes should be given. It is a tuple ,where
+        In field slice mode, the manual_shapes must be given. It is a tuple ,where
         the element is vocab[i], vocab[i] is the row numbers for i-th
         part.
 
@@ -128,16 +128,16 @@ class EmbeddingLookup(Cell):
         vocab_size (int): Size of the dictionary of embeddings.
         embedding_size (int): The size of each embedding vector.
         param_init (str): The initialize way of embedding table. Default: 'normal'.
-        target (str): Specify the target where the op is executed. The value should in
+        target (str): Specifies the target where the op is executed. The value must in
             ['DEVICE', 'CPU']. Default: 'CPU'.
-        slice_mode (str): The slicing way in semi_auto_parallel/auto_parallel. The value should get through
+        slice_mode (str): The slicing way in semi_auto_parallel/auto_parallel. The value must get through
             nn.EmbeddingLookup. Default: nn.EmbeddingLookup.BATCH_SLICE.
         manual_shapes (tuple): The accompaniment array in field slice mode.
 
     Inputs:
         - **input_indices** (Tensor) - The shape of tensor is :math:`(y_1, y_2, ..., y_S)`.
           Specifies the indices of elements of the original Tensor. Values can be out of range of embedding_table,
-          and the exceeding part will be filled with 0 in the output. Input_indices should only be a 2d tensor in
+          and the exceeding part will be filled with 0 in the output. Input_indices must only be a 2d tensor in
           this interface.
 
     Outputs:
diff --git a/mindspore/nn/layer/image.py b/mindspore/nn/layer/image.py
index e807c0cf0f..ea9964ea5b 100644
--- a/mindspore/nn/layer/image.py
+++ b/mindspore/nn/layer/image.py
@@ -193,8 +193,8 @@ class SSIM(Cell):
         k2 (float): The constant used to generate c2 in the contrast comparison function. Default: 0.03.
 
     Inputs:
-        - **img1** (Tensor) - The first image batch with format 'NCHW'. It should be the same shape and dtype as img2.
-        - **img2** (Tensor) - The second image batch with format 'NCHW'. It should be the same shape and dtype as img1.
+        - **img1** (Tensor) - The first image batch with format 'NCHW'. It must be the same shape and dtype as img2.
+        - **img2** (Tensor) - The second image batch with format 'NCHW'. It must be the same shape and dtype as img1.
 
     Outputs:
         Tensor, has the same dtype as img1. It is a 1-D tensor with shape N, where N is the batch num of img1.
@@ -267,8 +267,8 @@ class MSSSIM(Cell):
         k2 (float): The constant used to generate c2 in the contrast comparison function. Default: 0.03.
 
     Inputs:
-        - **img1** (Tensor) - The first image batch with format 'NCHW'. It should be the same shape and dtype as img2.
-        - **img2** (Tensor) - The second image batch with format 'NCHW'. It should be the same shape and dtype as img1.
+        - **img1** (Tensor) - The first image batch with format 'NCHW'. It must be the same shape and dtype as img2.
+        - **img2** (Tensor) - The second image batch with format 'NCHW'. It must be the same shape and dtype as img1.
 
     Outputs:
         Tensor, has the same dtype as img1. It is a 1-D tensor with shape N, where N is the batch num of img1.
@@ -352,8 +352,8 @@ class PSNR(Cell):
           Default: 1.0.
 
     Inputs:
-        - **img1** (Tensor) - The first image batch with format 'NCHW'. It should be the same shape and dtype as img2.
-        - **img2** (Tensor) - The second image batch with format 'NCHW'. It should be the same shape and dtype as img1.
+        - **img1** (Tensor) - The first image batch with format 'NCHW'. It must be the same shape and dtype as img2.
+        - **img2** (Tensor) - The second image batch with format 'NCHW'. It must be the same shape and dtype as img1.
 
     Outputs:
         Tensor, with dtype mindspore.float32. It is a 1-D tensor with shape N, where N is the batch num of img1.
diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py
index 6f68e3dd6c..9362dbc426 100755
--- a/mindspore/nn/layer/lstm.py
+++ b/mindspore/nn/layer/lstm.py
@@ -78,7 +78,7 @@ class LSTM(Cell):
         - **input** (Tensor) - Tensor of shape (seq_len, batch_size, `input_size`).
         - **hx** (tuple) - A tuple of two Tensors (h_0, c_0) both of data type mindspore.float32 or
           mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-          Data type of `hx` should be the same as `input`.
+          Data type of `hx` must be the same as `input`.
 
     Outputs:
         Tuple, a tuple constains (`output`, (`h_n`, `c_n`)).
@@ -208,7 +208,7 @@ class LSTMCell(Cell):
           mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
         - **c** - data type mindspore.float32 or
           mindspore.float16 and shape (num_directions * `num_layers`, batch_size, `hidden_size`).
-          Data type of `h' and 'c' should be the same of `input`.
+          Data type of `h' and 'c' must be the same of `input`.
 
     Outputs:
         `output`, `h_n`, `c_n`, 'reserve', 'state'.
diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py
index 87b5e79a3d..135bff22ad 100644
--- a/mindspore/nn/layer/math.py
+++ b/mindspore/nn/layer/math.py
@@ -36,8 +36,8 @@ class ReduceLogSumExp(Cell):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions.
                           Default : False.
 
     Inputs:
@@ -357,16 +357,16 @@ class MatMul(Cell):
     will be broadcasted and must be broadcastable.
 
     Args:
-        transpose_x1 (bool): If True, `a` is transposed before multiplication. Default: False.
-        transpose_x2 (bool): If True, `b` is transposed before multiplication. Default: False.
+        transpose_x1 (bool): If true, `a` is transposed before multiplication. Default: False.
+        transpose_x2 (bool): If true, `b` is transposed before multiplication. Default: False.
 
     Inputs:
         - **input_x1** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(*A, N, C)`,
           where :math:`*A` represents the batch size of `x1` which can be multidimensional.
-          If `transpose_a` is True, its shape should be :math:`(*A, N, C)` after transposing.
+          If `transpose_a` is True, its shape must be :math:`(*A, N, C)` after transposing.
         - **input_x2** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(*B, C, M)`,
           where :math:`*B` represents the batch size of `x2` which can be multidimensional.
-          If `transpose_b` is True, its shape should be :math:`(*B, C, M)` after transposing.
+          If `transpose_b` is True, its shape must be :math:`(*B, C, M)` after transposing.
 
     Outputs:
         Tensor, the shape of the output tensor is :math:`(*L, N, M)`. :math:`*L` is the batch size after broadcasting.
diff --git a/mindspore/nn/layer/pooling.py b/mindspore/nn/layer/pooling.py
index 068309430c..cddc7369ed 100644
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@@ -159,7 +159,7 @@ class AvgPool2d(_PoolNd):
 
     Args:
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value.
-            The data type of kernel_size should be int and the value represents the height and width,
+            The data type of kernel_size must be int and the value represents the height and width,
             or a tuple of two int numbers that represent height and width respectively.
             Default: 1.
         stride (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py
index 6cdcea0a38..b615c6889c 100644
--- a/mindspore/nn/layer/quant.py
+++ b/mindspore/nn/layer/quant.py
@@ -65,14 +65,14 @@ class Conv2dBnAct(Cell):
             and width of the 2D convolution window. Single int means the value is for both height and width of
             the kernel. A tuple of 2 ints means the first value is for the height and the other is for the
             width of the kernel.
-        stride (int): Specifies stride for all spatial dimensions with the same value. The value of stride should be
+        stride (int): Specifies stride for all spatial dimensions with the same value. The value of stride must be
             greater than or equal to 1 and lower than any one of the height and width of the input. Default: 1.
         pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
         padding (int): Implicit paddings on both sides of the input. Default: 0.
-        dilation (int): Specifying the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
-            there will be :math:`k - 1` pixels skipped for each sampling location. Its value should be greater than
+        dilation (int): Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`,
+            there will be :math:`k - 1` pixels skipped for each sampling location. Its value must be greater than
             or equal to 1 and lower than any one of the height and width of the input. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and `out_channels` should be
+        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
             divisible by the number of groups. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
@@ -85,14 +85,14 @@ class Conv2dBnAct(Cell):
             Initializer and string are the same as 'weight_init'. Refer to the values of
             Initializer for more details. Default: 'zeros'.
         has_bn (bool): Specifies to used batchnorm or not. Default: False.
-        momentum (float): Momentum for moving average.Momentum value should be [0, 1].Default:0.9
+        momentum (float): Momentum for moving average.Momentum value must be [0, 1].Default:0.9
         eps (float): Term added to the denominator to improve numerical stability. Should be greater than 0. Default:
                      1e-5.
         activation (Cell): Specifies activation type. The optional values are as following:
             'softmax', 'logsoftmax', 'relu', 'relu6', 'tanh', 'gelu', 'sigmoid',
             'prelu', 'leakyrelu', 'hswish', 'hsigmoid'. Default: None.
         alpha (float): Slope of the activation function at x < 0. Default: 0.2.
-        after_fake(bool): Determin whether there should be a fake quantization operation after Cond2dBnAct.
+        after_fake(bool): Determin whether there must be a fake quantization operation after Cond2dBnAct.
 
     Inputs:
         - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
@@ -176,7 +176,7 @@ class DenseBnAct(Cell):
         activation (string): Specifies activation type. The optional values are as following:
             'Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
             'PReLU', 'LeakyReLU', 'h-Swish', and 'h-Sigmoid'. Default: None.
-        after_fake(bool): Determin whether there should be a fake quantization operation after DenseBnAct.
+        after_fake(bool): Determin whether there must be a fake quantization operation after DenseBnAct.
 
     Inputs:
         - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
@@ -227,7 +227,7 @@ class BatchNormFoldCell(Cell):
     Batch normalization folded.
 
     Args:
-        momentum (float): Momentum value should be [0, 1]. Default: 0.9.
+        momentum (float): Momentum value must be [0, 1]. Default: 0.9.
         epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
             float32 else 1e-3. Default: 1e-5.
         freeze_bn (int): Delay in steps at which computation switches from regular batch
@@ -250,7 +250,7 @@ class BatchNormFoldCell(Cell):
     """
 
     def __init__(self, momentum=0.9, epsilon=1e-5, freeze_bn=0):
-        """init batch norm fold layer"""
+        """Initialize batch norm fold layer"""
         super(BatchNormFoldCell, self).__init__()
         self.epsilon = epsilon
         self.is_gpu = context.get_context('device_target') == "GPU"
@@ -323,7 +323,7 @@ class FakeQuantWithMinMax(Cell):
                  symmetric=False,
                  narrow_range=False,
                  quant_delay=0):
-        """init FakeQuantWithMinMax layer"""
+        """Initialize FakeQuantWithMinMax layer"""
         super(FakeQuantWithMinMax, self).__init__()
         validator.check_type("min_init", min_init, [int, float])
         validator.check_type("max_init", max_init, [int, float])
@@ -470,7 +470,7 @@ class Conv2dBnFoldQuant(Cell):
                  narrow_range=False,
                  quant_delay=0,
                  freeze_bn=100000):
-        """init Conv2dBnFoldQuant layer"""
+        """Initialize Conv2dBnFoldQuant layer"""
         super(Conv2dBnFoldQuant, self).__init__()
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -611,8 +611,8 @@ class Conv2dBnWithoutFoldQuant(Cell):
         stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
         pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
         padding (int): Implicit paddings on both sides of the input. Default: 0.
-        dilation (int): Specifying the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and `out_channels` should be
+        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
+        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
             divisible by the number of groups. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         eps (float): Parameters for BatchNormal. Default: 1e-5.
@@ -743,8 +743,8 @@ class Conv2dQuant(Cell):
         stride (int): Specifies stride for all spatial dimensions with the same value. Default: 1.
         pad_mode (str): Specifies padding mode. The optional values are "same", "valid", "pad". Default: "same".
         padding (int): Implicit paddings on both sides of the input. Default: 0.
-        dilation (int): Specifying the dilation rate to use for dilated convolution. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and `out_channels` should be
+        dilation (int): Specifies the dilation rate to use for dilated convolution. Default: 1.
+        group (int): Splits filter into groups, `in_ channels` and `out_channels` must be
             divisible by the number of groups. Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
         weight_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the convolution kernel.
diff --git a/mindspore/nn/learning_rate_schedule.py b/mindspore/nn/learning_rate_schedule.py
index 0210243755..90ecaafcd3 100644
--- a/mindspore/nn/learning_rate_schedule.py
+++ b/mindspore/nn/learning_rate_schedule.py
@@ -32,10 +32,10 @@ class LearningRateSchedule(Cell):
         """
         Defines the computation to get the current learning rate.
 
-        This method should be overridden by all subclasses.
+        This method must be overridden by all subclasses.
 
         Note:
-            The output should be a Tensor of scalar.
+            The output must be a Tensor of scalar.
 
         Inputs:
             Tensor. The current step number.
@@ -73,7 +73,7 @@ class ExponentialDecayLR(LearningRateSchedule):
         learning_rate (float): The initial value of learning rate.
         decay_rate (float): The decay rate.
         decay_steps (int): A value used to calculate decayed learning rate.
-        is_stair (bool): If True, learning rate is decayed once every `decay_steps` time. Default: False.
+        is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
 
     Inputs:
         Tensor. The current step number.
@@ -127,7 +127,7 @@ class NaturalExpDecayLR(LearningRateSchedule):
         learning_rate (float): The initial value of learning rate.
         decay_rate (float): The decay rate.
         decay_steps (int): A value used to calculate decayed learning rate.
-        is_stair (bool): If True, learning rate is decayed once every `decay_steps` time. Default: False.
+        is_stair (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
 
     Inputs:
         Tensor. The current step number.
@@ -292,8 +292,8 @@ class PolynomialDecayLR(LearningRateSchedule):
         learning_rate (float): The initial value of learning rate.
         end_learning_rate (float): The end value of learning rate.
         decay_steps (int): A value used to calculate decayed learning rate.
-        power (float): A value used to calculate decayed learning rate. This parameter should be greater than 0.
-        update_decay_steps (bool): If True, learning rate is decayed once every `decay_steps` time. Default: False.
+        power (float): A value used to calculate decayed learning rate. This parameter must be greater than 0.
+        update_decay_steps (bool): If true, learning rate is decayed once every `decay_steps` time. Default: False.
 
     Inputs:
         Tensor. The current step number.
diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index b3042bfebe..f3833ac56d 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -83,9 +83,9 @@ class L1Loss(_Loss):
             Default: "mean".
 
     Inputs:
-        - **input_data** (Tensor) - Tensor of shape :math:`(x_1, x_2, ..., x_R)`. The data type should be float16 or
+        - **input_data** (Tensor) - Tensor of shape :math:`(x_1, x_2, ..., x_R)`. The data type must be float16 or
           float32.
-        - **target_data** (Tensor) - Tensor of shape :math:`(y_1, y_2, ..., y_S)`. The data type should be float16 or
+        - **target_data** (Tensor) - Tensor of shape :math:`(y_1, y_2, ..., y_S)`. The data type must be float16 or
           float32.
 
     Outputs:
@@ -344,14 +344,14 @@ class CosineEmbeddingLoss(_Loss):
 
     Args:
         margin (float): Should be in [-1.0, 1.0]. Default 0.0.
-        reduction (str): Specifies which reduction to be applied to the output. It should be one of
+        reduction (str): Specifies which reduction to be applied to the output. It must be one of
           "none", "mean", and "sum", meaning no reduction, reduce mean and sum on output, respectively. Default "mean".
 
     Inputs:
         - **input_x1** (Tensor) - Input tensor.
-        - **input_x2** (Tensor) - Its shape and data type should be the same as `input_x1`'s shape and data type.
+        - **input_x2** (Tensor) - Its shape and data type must be the same as `input_x1`'s shape and data type.
         - **y** (Tensor) - Contains value 1 or -1. Suppose the shape of `input_x1` is
-          :math:`(x_1, x_2, x_3,..., x_R)`, then the shape of `target` should be :math:`(x_1, x_3, x_4, ..., x_R)`.
+          :math:`(x_1, x_2, x_3,..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
 
     Outputs:
         - **loss** (Tensor) - If `reduction` is "none", its shape is the same as `y`'s shape, otherwise a scalar value
diff --git a/mindspore/nn/metrics/_evaluation.py b/mindspore/nn/metrics/_evaluation.py
index dc25000751..d9c32bb162 100644
--- a/mindspore/nn/metrics/_evaluation.py
+++ b/mindspore/nn/metrics/_evaluation.py
@@ -77,7 +77,7 @@ class EvaluationBase(Metric):
         A interface describes the behavior of clearing the internal evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
         """
         raise NotImplementedError
 
@@ -86,7 +86,7 @@ class EvaluationBase(Metric):
         A interface describes the behavior of updating the internal evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
 
         Args:
             inputs: The first item is predicted array and the second item is target array.
@@ -98,6 +98,6 @@ class EvaluationBase(Metric):
         A interface describes the behavior of computing the evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
         """
         raise NotImplementedError
diff --git a/mindspore/nn/metrics/loss.py b/mindspore/nn/metrics/loss.py
index 3828fcdef5..63fd609c28 100644
--- a/mindspore/nn/metrics/loss.py
+++ b/mindspore/nn/metrics/loss.py
@@ -46,7 +46,7 @@ class Loss(Metric):
 
         Args:
             inputs: Inputs contain only one element, the element is loss. The dimension of
-                loss should be 0 or 1.
+                loss must be 0 or 1.
 
         Raises:
             ValueError: If the length of inputs is not 1.
diff --git a/mindspore/nn/metrics/metric.py b/mindspore/nn/metrics/metric.py
index 673403b29e..19c06d2759 100644
--- a/mindspore/nn/metrics/metric.py
+++ b/mindspore/nn/metrics/metric.py
@@ -85,7 +85,7 @@ class Metric(metaclass=ABCMeta):
         An interface describes the behavior of clearing the internal evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
         """
         raise NotImplementedError('Must define clear function to use this base class')
 
@@ -95,7 +95,7 @@ class Metric(metaclass=ABCMeta):
         An interface describes the behavior of computing the evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
         """
         raise NotImplementedError('Must define eval function to use this base class')
 
@@ -105,7 +105,7 @@ class Metric(metaclass=ABCMeta):
         An interface describes the behavior of updating the internal evaluation result.
 
         Note:
-            All subclasses should override this interface.
+            All subclasses must override this interface.
 
         Args:
             inputs: A variable-length input argument list.
diff --git a/mindspore/nn/metrics/precision.py b/mindspore/nn/metrics/precision.py
index c0a3f9c12e..096d936a87 100644
--- a/mindspore/nn/metrics/precision.py
+++ b/mindspore/nn/metrics/precision.py
@@ -34,7 +34,7 @@ class Precision(EvaluationBase):
         \text{precision} = \frac{\text{true_positive}}{\text{true_positive} + \text{false_positive}}
 
     Note:
-        In the multi-label cases, the elements of :math:`y` and :math:`y_{pred}` should be 0 or 1.
+        In the multi-label cases, the elements of :math:`y` and :math:`y_{pred}` must be 0 or 1.
 
     Args:
         eval_type (str): Metric to calculate accuracy over a dataset, for classification or
diff --git a/mindspore/nn/metrics/recall.py b/mindspore/nn/metrics/recall.py
index e0bd7538a3..32657995b3 100644
--- a/mindspore/nn/metrics/recall.py
+++ b/mindspore/nn/metrics/recall.py
@@ -34,7 +34,7 @@ class Recall(EvaluationBase):
         \text{recall} = \frac{\text{true_positive}}{\text{true_positive} + \text{false_negative}}
 
     Note:
-        In the multi-label cases, the elements of :math:`y` and :math:`y_{pred}` should be 0 or 1.
+        In the multi-label cases, the elements of :math:`y` and :math:`y_{pred}` must be 0 or 1.
 
     Args:
         eval_type (str): Metric to calculate the recall over a dataset, for classification or
diff --git a/mindspore/nn/optim/adam.py b/mindspore/nn/optim/adam.py
index 40cb4b70d6..750b29c562 100755
--- a/mindspore/nn/optim/adam.py
+++ b/mindspore/nn/optim/adam.py
@@ -166,10 +166,10 @@ class Adam(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" is in the keys, the value of the corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -177,16 +177,16 @@ class Adam(Optimizer):
             - weight_decay: Optional. If "weight_decay" is in the keys, the value of the corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" is in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" is in the keys, the value must be the order of parameters and
               the order will be followed in the optimizer. There are no other keys in the `dict` and the parameters
-              which in the 'order_params' should be in one of group parameters.
+              which in the 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use the dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
@@ -201,7 +201,7 @@ class Adam(Optimizer):
         use_nesterov (bool): Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients.
             If true, update the gradients using NAG.
             If false, update the gradients without using NAG. Default: False.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
         loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
 
     Inputs:
@@ -290,10 +290,10 @@ class AdamWeightDecay(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" is in the keys, the value of the corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -301,16 +301,16 @@ class AdamWeightDecay(Optimizer):
             - weight_decay: Optional. If "weight_decay" is in the keys, the value of the corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" is in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" is in the keys, the value must be the order of parameters and
               the order will be followed in the optimizer. There are no other keys in the `dict` and the parameters
-              which in the 'order_params' should be in one of group parameters.
+              which in the 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use the dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimations. Default: 0.9.
@@ -319,7 +319,7 @@ class AdamWeightDecay(Optimizer):
             Should be in range (0.0, 1.0).
         eps (float): Term added to the denominator to improve numerical stability. Default: 1e-6.
             Should be greater than 0.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
 
     Inputs:
         - **gradients** (tuple[Tensor]) - The gradients of `params`, the shape is the same as `params`.
diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py
index 1ede68bb50..8667a0fa2e 100644
--- a/mindspore/nn/optim/ftrl.py
+++ b/mindspore/nn/optim/ftrl.py
@@ -93,29 +93,29 @@ class FTRL(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Using different learning rate by separating parameters is currently not supported.
 
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         initial_accum (float): The starting value for accumulators, must be zero or positive values. Default: 0.1.
-        learning_rate (float): The learning rate value, should be zero or positive, dynamic learning rate is currently
+        learning_rate (float): The learning rate value, must be zero or positive, dynamic learning rate is currently
             not supported. Default: 0.001.
         lr_power (float): Learning rate power controls how the learning rate decreases during training, must be less
             than or equal to zero. Use fixed learning rate if lr_power is zero. Default: -0.5.
         l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
-        use_locking (bool): If True, use locks for updating operation. Default: False.
-        loss_scale (float): Value for the loss scale. It should be equal to or greater than 1.0. Default: 1.0.
+        use_locking (bool): If true, use locks for updating operation. Default: False.
+        loss_scale (float): Value for the loss scale. It must be equal to or greater than 1.0. Default: 1.0.
         weight_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/lamb.py b/mindspore/nn/optim/lamb.py
index be02ae1ffd..c8aa14ff3a 100755
--- a/mindspore/nn/optim/lamb.py
+++ b/mindspore/nn/optim/lamb.py
@@ -199,10 +199,10 @@ class Lamb(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -210,16 +210,16 @@ class Lamb(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
         beta1 (float): The exponential decay rate for the 1st moment estimations. Default: 0.9.
             Should be in range (0.0, 1.0).
diff --git a/mindspore/nn/optim/lazyadam.py b/mindspore/nn/optim/lazyadam.py
index a52e465722..c74d2ab11c 100644
--- a/mindspore/nn/optim/lazyadam.py
+++ b/mindspore/nn/optim/lazyadam.py
@@ -112,10 +112,10 @@ class LazyAdam(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr" and "weight_decay" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -123,16 +123,16 @@ class LazyAdam(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 1e-3.
         beta1 (float): The exponential decay rate for the 1st moment estimations. Should be in range (0.0, 1.0).
diff --git a/mindspore/nn/optim/momentum.py b/mindspore/nn/optim/momentum.py
index c22cce2c00..926639b367 100755
--- a/mindspore/nn/optim/momentum.py
+++ b/mindspore/nn/optim/momentum.py
@@ -68,10 +68,10 @@ class Momentum(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -79,21 +79,21 @@ class Momentum(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
         momentum (float): Hyperparameter of type float, means momentum for the moving average.
-            It should be at least 0.0.
-        weight_decay (int, float): Weight decay (L2 penalty). It should be equal to or greater than 0.0. Default: 0.0.
-        loss_scale (int, float): A floating point value for the loss scale. It should be greater than 0.0. Default: 1.0.
+            It must be at least 0.0.
+        weight_decay (int, float): Weight decay (L2 penalty). It must be equal to or greater than 0.0. Default: 0.0.
+        loss_scale (int, float): A floating point value for the loss scale. It must be greater than 0.0. Default: 1.0.
         use_nesterov (bool): Enable Nesterov momentum. Default: False.
 
     Inputs:
diff --git a/mindspore/nn/optim/optimizer.py b/mindspore/nn/optim/optimizer.py
index c60498df21..9dd4dd260e 100755
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@@ -58,13 +58,13 @@ class Optimizer(Cell):
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
         parameters (Union[list[Parameter], list[dict]]): When the `parameters` is a list of `Parameter` which will be
-            updated, the element in `parameters` should be class `Parameter`. When the `parameters` is a list of `dict`,
+            updated, the element in `parameters` must be class `Parameter`. When the `parameters` is a list of `dict`,
             the "params", "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -72,13 +72,13 @@ class Optimizer(Cell):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
-        weight_decay (float): A floating point value for the weight decay. It should be equal to or greater than 0.
+        weight_decay (float): A floating point value for the weight decay. It must be equal to or greater than 0.
             If the type of `weight_decay` input is int, it will be converted to float. Default: 0.0.
-        loss_scale (float): A floating point value for the loss scale. It should be greater than 0. If the
+        loss_scale (float): A floating point value for the loss scale. It must be greater than 0. If the
             type of `loss_scale` input is int, it will be converted to float. Default: 1.0.
 
     Raises:
@@ -315,7 +315,7 @@ class Optimizer(Cell):
                         raise ValueError("The Tensor type dynamic learning rate in group should be the same size.")
 
     def _init_group_params(self, parameters, learning_rate, weight_decay):
-        """Init learning rate or weight decay in group params."""
+        """Initialize learning rate or weight decay in group params."""
         self._parse_group_params(parameters, learning_rate)
         default_lr = self._build_single_lr(learning_rate, 'learning_rate')
 
diff --git a/mindspore/nn/optim/proximal_ada_grad.py b/mindspore/nn/optim/proximal_ada_grad.py
index 12287c59d0..4776d304aa 100644
--- a/mindspore/nn/optim/proximal_ada_grad.py
+++ b/mindspore/nn/optim/proximal_ada_grad.py
@@ -71,10 +71,10 @@ class ProximalAdagrad(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -82,9 +82,9 @@ class ProximalAdagrad(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         accum (float): The starting value for accumulators, must be zero or positive values. Default: 0.1.
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
@@ -92,13 +92,13 @@ class ProximalAdagrad(Optimizer):
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 0.001.
         l1 (float): l1 regularization strength, must be greater than or equal to zero. Default: 0.0.
         l2 (float): l2 regularization strength, must be greater than or equal to zero. Default: 0.0.
-        use_locking (bool): If True, use locks for updating operation. Default: False.
-        loss_scale (float): Value for the loss scale. It should be greater than 0.0. Default: 1.0.
+        use_locking (bool): If true, use locks for updating operation. Default: False.
+        loss_scale (float): Value for the loss scale. It must be greater than 0.0. Default: 1.0.
         weight_decay (float): Weight decay value to multiply weight, must be zero or positive value. Default: 0.0.
 
     Inputs:
diff --git a/mindspore/nn/optim/rmsprop.py b/mindspore/nn/optim/rmsprop.py
index 428c6840a3..57a3b41759 100644
--- a/mindspore/nn/optim/rmsprop.py
+++ b/mindspore/nn/optim/rmsprop.py
@@ -91,10 +91,10 @@ class RMSProp(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -102,16 +102,16 @@ class RMSProp(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 0.1.
         decay (float): Decay rate. Should be equal to or greater than 0. Default: 0.9.
@@ -119,8 +119,9 @@ class RMSProp(Optimizer):
                           greater than 0. Default: 0.0.
         epsilon (float): Term added to the denominator to improve numerical stability. Should be greater than
                          0. Default: 1e-10.
-        use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
-        centered (bool): If True, gradients are normalized by the estimated variance of the gradient. Default: False.
+        use_locking (bool):  Whether to enable a lock to protect the variable and accumlation tensors from being
+                             updated. Default: False.
+        centered (bool): If true, gradients are normalized by the estimated variance of the gradient. Default: False.
         loss_scale (float): A floating point value for the loss scale. Should be greater than 0. Default: 1.0.
         weight_decay (float): Weight decay (L2 penalty). Should be equal to or greater than 0. Default: 0.0.
 
diff --git a/mindspore/nn/optim/sgd.py b/mindspore/nn/optim/sgd.py
index bf9b8f559e..d0bc8d5627 100755
--- a/mindspore/nn/optim/sgd.py
+++ b/mindspore/nn/optim/sgd.py
@@ -63,10 +63,10 @@ class SGD(Optimizer):
 
     Args:
         params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
-            the element in `params` should be class `Parameter`. When the `params` is a list of `dict`, the "params",
+            the element in `params` must be class `Parameter`. When the `params` is a list of `dict`, the "params",
             "lr", "weight_decay" and "order_params" are the keys can be parsed.
 
-            - params: Required. The value should be a list of `Parameter`.
+            - params: Required. The value must be a list of `Parameter`.
 
             - lr: Optional. If "lr" in the keys, the value of corresponding learning rate will be used.
               If not, the `learning_rate` in the API will be used.
@@ -74,24 +74,24 @@ class SGD(Optimizer):
             - weight_decay: Optional. If "weight_decay" in the keys, the value of corresponding weight decay
               will be used. If not, the `weight_decay` in the API will be used.
 
-            - order_params: Optional. If "order_params" in the keys, the value should be the order of parameters and
+            - order_params: Optional. If "order_params" in the keys, the value must be the order of parameters and
               the order will be followed in optimizer. There are no other keys in the `dict` and the parameters which
-              in the value of 'order_params' should be in one of group parameters.
+              in the value of 'order_params' must be in one of group parameters.
 
         learning_rate (Union[float, Tensor, Iterable, LearningRateSchedule]): A value or a graph for the learning rate.
             When the learning_rate is an Iterable or a Tensor in a 1D dimension, use dynamic learning rate, then
             the i-th step will take the i-th value as the learning rate. When the learning_rate is LearningRateSchedule,
             use dynamic learning rate, the i-th learning rate will be calculated during the process of training
             according to the formula of LearningRateSchedule. When the learning_rate is a float or a Tensor in a zero
-            dimension, use fixed learning rate. Other cases are not supported. The float learning rate should be
+            dimension, use fixed learning rate. Other cases are not supported. The float learning rate must be
             equal to or greater than 0. If the type of `learning_rate` is int, it will be converted to float.
             Default: 0.1.
-        momentum (float): A floating point value the momentum. should be at least 0.0. Default: 0.0.
-        dampening (float): A floating point value of dampening for momentum. should be at least 0.0. Default: 0.0.
-        weight_decay (float): Weight decay (L2 penalty). It should be equal to or greater than 0. Default: 0.0.
+        momentum (float): A floating point value the momentum. must be at least 0.0. Default: 0.0.
+        dampening (float): A floating point value of dampening for momentum. must be at least 0.0. Default: 0.0.
+        weight_decay (float): Weight decay (L2 penalty). It must be equal to or greater than 0. Default: 0.0.
         nesterov (bool): Enables the Nesterov momentum. If use nesterov, momentum must be positive,
                          and dampening must equal to 0.0. Default: False.
-        loss_scale (float): A floating point value for the loss scale, which should be larger
+        loss_scale (float): A floating point value for the loss scale, which must be larger
                             than 0.0. Default: 1.0.
 
     Inputs:
diff --git a/mindspore/nn/probability/bijector/bijector.py b/mindspore/nn/probability/bijector/bijector.py
index 594671fd29..72b30e0d6c 100644
--- a/mindspore/nn/probability/bijector/bijector.py
+++ b/mindspore/nn/probability/bijector/bijector.py
@@ -121,7 +121,7 @@ class Bijector(Cell):
         This __call__ may go into two directions:
         If args[0] is a distribution instance, the call will generate a new distribution derived from
         the input distribution.
-        Otherwise, input[0] should be the name of a Bijector function, e.g. "forward", then this call will
+        Otherwise, input[0] must be the name of a Bijector function, e.g. "forward", then this call will
         go in the construct and invoke the correstpoding Bijector function.
 
         Args:
diff --git a/mindspore/nn/probability/bnn_layers/conv_variational.py b/mindspore/nn/probability/bnn_layers/conv_variational.py
index 3f5ec10ec5..f1ca42f733 100644
--- a/mindspore/nn/probability/bnn_layers/conv_variational.py
+++ b/mindspore/nn/probability/bnn_layers/conv_variational.py
@@ -211,7 +211,7 @@ class ConvReparam(_ConvVariational):
 
             - pad: Implicit paddings on both sides of the input. The number
               of `padding` will be padded to the input Tensor borders.
-              `padding` should be greater than or equal to 0.
+              `padding` must be greater than or equal to 0.
 
         padding (Union[int, tuple[int]]): Implicit paddings on both sides of
             the input. Default: 0.
@@ -219,27 +219,27 @@ class ConvReparam(_ConvVariational):
             of 2 integers. This parameter specifies the dilation rate of the
             dilated convolution. If set to be :math:`k > 1`,
             there will be :math:`k - 1` pixels skipped for each sampling
-            location. Its value should be greater or equal to 1 and bounded
+            location. Its value must be greater or equal to 1 and bounded
             by the height and width of the input. Default: 1.
-        group (int): Split filter into groups, `in_ channels` and
-            `out_channels` should be divisible by the number of groups.
+        group (int): Splits filter into groups, `in_ channels` and
+            `out_channels` must be divisible by the number of groups.
             Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector.
             Default: False.
         weight_prior_fn: The prior distribution for weight.
-            It should return a mindspore distribution instance.
+            It must return a mindspore distribution instance.
             Default: NormalPrior. (which creates an instance of standard
             normal distribution). The current version only supports normal distribution.
         weight_posterior_fn: The posterior distribution for sampling weight.
-            It should be a function handle which returns a mindspore
+            It must be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
-        bias_prior_fn: The prior distribution for bias vector. It should return
+        bias_prior_fn: The prior distribution for bias vector. It must return
             a mindspore distribution. Default: NormalPrior(which creates an
             instance of standard normal distribution). The current version
             only supports normal distribution.
         bias_posterior_fn: The posterior distribution for sampling bias vector.
-            It should be a function handle which returns a mindspore
+            It must be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
 
diff --git a/mindspore/nn/probability/bnn_layers/dense_variational.py b/mindspore/nn/probability/bnn_layers/dense_variational.py
index af2fba7b97..20e9be8d53 100644
--- a/mindspore/nn/probability/bnn_layers/dense_variational.py
+++ b/mindspore/nn/probability/bnn_layers/dense_variational.py
@@ -166,19 +166,19 @@ class DenseReparam(_DenseVariational):
             can be a string (eg. 'relu') or a Cell (eg. nn.ReLU()). Note that if the type of activation is Cell, it must
             be instantiated beforehand. Default: None.
         weight_prior_fn: The prior distribution for weight.
-            It should return a mindspore distribution instance.
+            It must return a mindspore distribution instance.
             Default: NormalPrior. (which creates an instance of standard
             normal distribution). The current version only supports normal distribution.
         weight_posterior_fn: The posterior distribution for sampling weight.
-            It should be a function handle which returns a mindspore
+            It must be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
-        bias_prior_fn: The prior distribution for bias vector. It should return
+        bias_prior_fn: The prior distribution for bias vector. It must return
             a mindspore distribution. Default: NormalPrior(which creates an
             instance of standard normal distribution). The current version
             only supports normal distribution.
         bias_posterior_fn: The posterior distribution for sampling bias vector.
-            It should be a function handle which returns a mindspore
+            It must be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
 
diff --git a/mindspore/nn/probability/distribution/bernoulli.py b/mindspore/nn/probability/distribution/bernoulli.py
index 6543f41aae..51ab3d7df3 100644
--- a/mindspore/nn/probability/distribution/bernoulli.py
+++ b/mindspore/nn/probability/distribution/bernoulli.py
@@ -32,7 +32,7 @@ class Bernoulli(Distribution):
         name (str): The name of the distribution. Default: 'Bernoulli'.
 
     Note:
-        `probs` should be a proper probability (0 < p < 1).
+        `probs` must be a proper probability (0 < p < 1).
         `dist_spec_args` is `probs`.
 
     Examples:
diff --git a/mindspore/nn/probability/distribution/categorical.py b/mindspore/nn/probability/distribution/categorical.py
index a8492ad52a..feb1778da9 100644
--- a/mindspore/nn/probability/distribution/categorical.py
+++ b/mindspore/nn/probability/distribution/categorical.py
@@ -50,7 +50,7 @@ class Categorical(Distribution):
         >>>
         >>>         # Similar calls can be made to logits
         >>>         ans = self.ca.probs
-        >>>         # value should be Tensor(mstype.float32, bool, mstype.int32)
+        >>>         # value must be Tensor(mstype.float32, bool, mstype.int32)
         >>>         ans = self.ca.log_prob(value)
         >>>
         >>>         # Usage of enumerate_support
diff --git a/mindspore/nn/probability/distribution/distribution.py b/mindspore/nn/probability/distribution/distribution.py
index 835271dedc..eb2719f15d 100644
--- a/mindspore/nn/probability/distribution/distribution.py
+++ b/mindspore/nn/probability/distribution/distribution.py
@@ -34,9 +34,9 @@ class Distribution(Cell):
         param (dict): The parameters used to initialize the distribution.
 
     Note:
-        Derived class should override operations such as `_mean`, `_prob`,
+        Derived class must override operations such as `_mean`, `_prob`,
         and `_log_prob`. Required arguments, such as `value` for `_prob`,
-        should be passed in through `args` or `kwargs`. `dist_spec_args` which specifies
+        must be passed in through `args` or `kwargs`. `dist_spec_args` which specifies
         a new distribution are optional.
 
         `dist_spec_args` is unique for each type of distribution. For example, `mean` and `sd`
diff --git a/mindspore/nn/probability/distribution/exponential.py b/mindspore/nn/probability/distribution/exponential.py
index b58b0a2288..8b2f3aa83e 100644
--- a/mindspore/nn/probability/distribution/exponential.py
+++ b/mindspore/nn/probability/distribution/exponential.py
@@ -33,9 +33,9 @@ class Exponential(Distribution):
         name (str): The name of the distribution. Default: 'Exponential'.
 
     Note:
-        `rate` should be strictly greater than 0.
+        `rate` must be strictly greater than 0.
         `dist_spec_args` is `rate`.
-        `dtype` should be a float type because Exponential distributions are continuous.
+        `dtype` must be a float type because Exponential distributions are continuous.
 
     Examples:
         >>> # To initialize an Exponential distribution of the rate 0.5.
@@ -216,7 +216,7 @@ class Exponential(Distribution):
             rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            `value` should be greater or equal to zero.
+            `value` must be greater or equal to zero.
 
         .. math::
             log_pdf(x) = \log(rate) - rate * x if x >= 0 else 0
@@ -239,7 +239,7 @@ class Exponential(Distribution):
             rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            `value` should be greater or equal to zero.
+            `value` must be greater or equal to zero.
 
         .. math::
             cdf(x) = 1.0 - \exp(-1 * \lambda * x) if x >= 0 else 0
@@ -261,7 +261,7 @@ class Exponential(Distribution):
             rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            `value` should be greater or equal to zero.
+            `value` must be greater or equal to zero.
 
         .. math::
             log_survival_function(x) = -1 * \lambda * x if x >= 0 else 0
diff --git a/mindspore/nn/probability/distribution/geometric.py b/mindspore/nn/probability/distribution/geometric.py
index e2d6255ede..4087949c15 100644
--- a/mindspore/nn/probability/distribution/geometric.py
+++ b/mindspore/nn/probability/distribution/geometric.py
@@ -36,7 +36,7 @@ class Geometric(Distribution):
         name (str): The name of the distribution. Default: 'Geometric'.
 
     Note:
-        `probs` should be a proper probability (0 < p < 1).
+        `probs` must be a proper probability (0 < p < 1).
         `dist_spec_args` is `probs`.
 
     Examples:
diff --git a/mindspore/nn/probability/distribution/normal.py b/mindspore/nn/probability/distribution/normal.py
index aef0a01b51..7d4da39b6a 100644
--- a/mindspore/nn/probability/distribution/normal.py
+++ b/mindspore/nn/probability/distribution/normal.py
@@ -35,9 +35,9 @@ class Normal(Distribution):
         name (str): The name of the distribution. Default: 'Normal'.
 
     Note:
-        `sd` should be greater than zero.
+        `sd` must be greater than zero.
         `dist_spec_args` are `mean` and `sd`.
-        `dtype` should be a float type because Normal distributions are continuous.
+        `dtype` must be a float type because Normal distributions are continuous.
 
     Examples:
         >>> # To initialize a Normal distribution of the mean 3.0 and the standard deviation 4.0.
diff --git a/mindspore/nn/probability/distribution/uniform.py b/mindspore/nn/probability/distribution/uniform.py
index ce49bfa6cd..6668161cd5 100644
--- a/mindspore/nn/probability/distribution/uniform.py
+++ b/mindspore/nn/probability/distribution/uniform.py
@@ -34,9 +34,9 @@ class Uniform(Distribution):
         name (str): The name of the distribution. Default: 'Uniform'.
 
     Note:
-        `low` should be stricly less than `high`.
+        `low` must be stricly less than `high`.
         `dist_spec_args` are `high` and `low`.
-        `dtype` should be float type because Uniform distributions are continuous.
+        `dtype` must be float type because Uniform distributions are continuous.
 
     Examples:
         >>> # To initialize a Uniform distribution of the lower bound 0.0 and the higher bound 1.0.
diff --git a/mindspore/nn/probability/dpn/vae/cvae.py b/mindspore/nn/probability/dpn/vae/cvae.py
index ee44326c0c..01577a4677 100644
--- a/mindspore/nn/probability/dpn/vae/cvae.py
+++ b/mindspore/nn/probability/dpn/vae/cvae.py
@@ -31,8 +31,8 @@ class ConditionalVAE(Cell):
 
     Note:
         When encoder and decoder ard defined, the shape of the encoder's output tensor and decoder's input tensor
-        should be :math:`(N, hidden\_size)`.
-        The latent_size should be less than or equal to the hidden_size.
+        must be :math:`(N, hidden\_size)`.
+        The latent_size must be less than or equal to the hidden_size.
 
     Args:
         encoder(Cell): The Deep Neural Network (DNN) model defined as encoder.
@@ -103,7 +103,7 @@ class ConditionalVAE(Cell):
         Args:
             sample_y (Tensor): Define the label of samples. Tensor of shape (generate_nums, ) and type mindspore.int32.
             generate_nums (int): The number of samples to generate.
-            shape(tuple): The shape of sample, which should be the format of (generate_nums, C, H, W) or (-1, C, H, W).
+            shape(tuple): The shape of sample, which must be the format of (generate_nums, C, H, W) or (-1, C, H, W).
 
         Returns:
             Tensor, the generated samples.
diff --git a/mindspore/nn/probability/dpn/vae/vae.py b/mindspore/nn/probability/dpn/vae/vae.py
index 8fb7255657..c0aff3b567 100644
--- a/mindspore/nn/probability/dpn/vae/vae.py
+++ b/mindspore/nn/probability/dpn/vae/vae.py
@@ -29,8 +29,8 @@ class VAE(Cell):
 
     Note:
         When the encoder and decoder are defined, the shape of the encoder's output tensor and decoder's input tensor
-        should be :math:`(N, hidden\_size)`.
-        The latent_size should be less than or equal to the hidden_size.
+        must be :math:`(N, hidden\_size)`.
+        The latent_size must be less than or equal to the hidden_size.
 
     Args:
         encoder(Cell): The Deep Neural Network (DNN) model defined as encoder.
@@ -89,7 +89,7 @@ class VAE(Cell):
 
         Args:
             generate_nums (int): The number of samples to generate.
-            shape(tuple): The shape of sample, it should be (generate_nums, C, H, W) or (-1, C, H, W).
+            shape(tuple): The shape of sample, it must be (generate_nums, C, H, W) or (-1, C, H, W).
 
         Returns:
             Tensor, the generated samples.
diff --git a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
index 67ef74b28a..c802972a27 100644
--- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
+++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
@@ -48,8 +48,8 @@ class UncertaintyEvaluation:
         epochs (int): Total number of iterations on the data. Default: 1.
         epi_uncer_model_path (str): The save or read path of the epistemic uncertainty model. Default: None.
         ale_uncer_model_path (str): The save or read path of the aleatoric uncertainty model. Default: None.
-        save_model (bool): Whether to save the uncertainty model or not, if True, the epi_uncer_model_path
-                        and ale_uncer_model_path should not be None. If False, the model to evaluate will be loaded from
+        save_model (bool): Whether to save the uncertainty model or not, if true, the epi_uncer_model_path
+                        and ale_uncer_model_path must not be None. If false, the model to evaluate will be loaded from
                         the the path of the uncertainty model; if the path is not given , it will not save or load the
                         uncertainty model. Default: False.
 
@@ -192,7 +192,7 @@ class UncertaintyEvaluation:
         Evaluate the epistemic uncertainty of inference results, which also called model uncertainty.
 
         Args:
-            eval_data (Tensor): The data samples to be evaluated, the shape should be (N,C,H,W).
+            eval_data (Tensor): The data samples to be evaluated, the shape must be (N,C,H,W).
 
         Returns:
             numpy.dtype, the epistemic uncertainty of inference results of data samples.
@@ -205,7 +205,7 @@ class UncertaintyEvaluation:
         Evaluate the aleatoric uncertainty of inference results, which also called data uncertainty.
 
         Args:
-            eval_data (Tensor): The data samples to be evaluated, the shape should be (N,C,H,W).
+            eval_data (Tensor): The data samples to be evaluated, the shape must be (N,C,H,W).
 
         Returns:
             numpy.dtype, the aleatoric uncertainty of inference results of data samples.
@@ -258,7 +258,7 @@ class EpistemicUncertaintyModel(Cell):
 class AleatoricUncertaintyModel(Cell):
     """
     The aleatoric uncertainty (also called data uncertainty) is caused by input data, to obtain this
-    uncertainty, the loss function should be modified in order to add variance into loss.
+    uncertainty, the loss function must be modified in order to add variance into loss.
 
     See more details in `What Uncertainties Do We Need in Bayesian Deep Learning for Computer Vision?
     <https://arxiv.org/abs/1703.04977>`_.
diff --git a/mindspore/nn/probability/transforms/transform_bnn.py b/mindspore/nn/probability/transforms/transform_bnn.py
index 1dc89b59e7..9d23af8034 100644
--- a/mindspore/nn/probability/transforms/transform_bnn.py
+++ b/mindspore/nn/probability/transforms/transform_bnn.py
@@ -99,9 +99,9 @@ class TransformToBNN:
                 {"in_channels": dp.in_channels, "out_channels": dp.out_channels, "pad_mode": dp.pad_mode,
                 "kernel_size": dp.kernel_size, "stride": dp.stride, "has_bias": dp.has_bias}.
             add_dense_args (dict): The new arguments added to BNN full connection layer. Note that the arguments in
-                `add_dense_args` should not duplicate arguments in `get_dense_args`. Default: None.
+                `add_dense_args` must not duplicate arguments in `get_dense_args`. Default: None.
             add_conv_args (dict): The new arguments added to BNN convolutional layer. Note that the arguments in
-                `add_conv_args` should not duplicate arguments in `get_conv_args`. Default: None.
+                `add_conv_args` must not duplicate arguments in `get_conv_args`. Default: None.
 
         Returns:
             Cell, a trainable BNN model wrapped by TrainOneStepCell.
@@ -143,7 +143,7 @@ class TransformToBNN:
             bnn_layer_type (Cell): The type of BNN layer to be transformed to. The optional values are
                 DenseReparam and ConvReparam.
             get_args: The arguments gotten from the DNN layer. Default: None.
-            add_args (dict): The new arguments added to BNN layer. Note that the arguments in `add_args` should not
+            add_args (dict): The new arguments added to BNN layer. Note that the arguments in `add_args` must not
                 duplicate arguments in `get_args`. Default: None.
 
         Returns:
diff --git a/mindspore/nn/wrap/cell_wrapper.py b/mindspore/nn/wrap/cell_wrapper.py
index c1bb9429ef..f283dd834d 100644
--- a/mindspore/nn/wrap/cell_wrapper.py
+++ b/mindspore/nn/wrap/cell_wrapper.py
@@ -91,7 +91,7 @@ class WithGradCell(Cell):
         network (Cell): The target network to wrap. The network only supports single output.
         loss_fn (Cell): Primitive loss function used to compute gradients. Default: None.
         sens (Union[None, Tensor, Scalar, Tuple ...]): The sensitive for backpropagation, the type and shape
-            should be same as the `network` output. If None, we will fill one to a same type shape of
+            must be same as the `network` output. If None, we will fill one to a same type shape of
             output value. Default: None.
 
     Inputs:
diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index 47f502e780..ea47b6ca73 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -331,7 +331,7 @@ class DistributedGradReducer(Cell):
     def construct(self, grads):
         """
         Under certain circumstances, the data precision of grads could be mixed with float16 and float32. Thus, the
-        result of AllReduce is unreliable. To solve the problem, grads should be cast to float32 before AllReduce,
+        result of AllReduce is unreliable. To solve the problem, grads must be cast to float32 before AllReduce,
         and cast back after the operation.
 
         Args:
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 22c3114fb2..9389c021f9 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -64,7 +64,7 @@ class DynamicLossScaleUpdateCell(Cell):
     executed on host).
 
     Args:
-        loss_scale_value (float): Init loss scale.
+        loss_scale_value (float): Initializes loss scale.
         scale_factor (int): Coefficient of increase and decrease.
         scale_window (int): Maximum continuous training steps that do not have overflow.
 
@@ -139,7 +139,7 @@ class FixedLossScaleUpdateCell(Cell):
     For usage, refer to `DynamicLossScaleUpdateCell`.
 
     Args:
-        loss_scale_value (float): Init loss scale.
+        loss_scale_value (float): Initializes loss scale.
 
     Examples:
         >>> net_with_loss = Net()
@@ -173,8 +173,8 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
     Cell as args. The loss scale value can be updated in both host side or device side. The
     TrainOneStepWithLossScaleCell will be compiled to be graph which takes `*inputs` as input data.
     The Tensor type of `scale_sense` is acting as loss scaling value. If you want to update it on host side,
-    the value should be provided. If  the Tensor type of `scale_sense` is not given, the loss scale update logic
-    should be provied by Cell type of `scale_sense`.
+    the value must be provided. If  the Tensor type of `scale_sense` is not given, the loss scale update logic
+    must be provied by Cell type of `scale_sense`.
 
     Args:
         network (Cell): The training network. The network only supports single output.
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index b408419c29..a71a283cfb 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -27,7 +27,7 @@ class AbsGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AbsGrad"""
+        """Initialize AbsGrad"""
 
     def infer_shape(self, y, dy):
         return y
@@ -46,7 +46,7 @@ class ACosGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ACosGrad"""
+        """Initialize ACosGrad"""
 
     def infer_shape(self, x, dout):
         validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
@@ -63,7 +63,7 @@ class AcoshGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AcoshGrad"""
+        """Initialize AcoshGrad"""
 
     def infer_shape(self, x, dout):
         validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
@@ -85,7 +85,7 @@ class AsinGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init AsinGrad"""
+        """Initialize AsinGrad"""
 
     def infer_shape(self, x, dout):
         validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
@@ -102,7 +102,7 @@ class AsinhGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AsinhGrad"""
+        """Initialize AsinhGrad"""
 
     def infer_shape(self, x, dout):
         validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
@@ -119,7 +119,7 @@ class ReciprocalGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ReciprocalGrad"""
+        """Initialize ReciprocalGrad"""
 
     def infer_shape(self, x_shape, dout_shape):
         validator.check("x shape", x_shape, "dout shape", dout_shape, Rel.EQ, self.name)
@@ -136,7 +136,7 @@ class RsqrtGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init RsqrtGrad"""
+        """Initialize RsqrtGrad"""
 
     def infer_shape(self, x_shape, dout_shape):
         validator.check("x shape", x_shape, "dout shape", dout_shape, Rel.EQ, self.name)
@@ -153,7 +153,7 @@ class SoftmaxGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init SoftmaxGrad"""
+        """Initialize SoftmaxGrad"""
 
     def infer_shape(self, x_shape, dout_shape):
         validator.check("x shape", x_shape, "dout shape", dout_shape, Rel.EQ, self.name)
@@ -170,7 +170,7 @@ class SqrtGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init SqrtGrad"""
+        """Initialize SqrtGrad"""
 
     def infer_shape(self, x_shape, dout_shape):
         validator.check("x shape", x_shape, "dout shape", dout_shape, Rel.EQ, self.name)
@@ -254,7 +254,7 @@ class ConcatOffset(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, N=2, axis=0):
-        """init ConcatOffset"""
+        """Initialize ConcatOffset"""
 
     def __infer__(self, input_x):
         axis = self.axis
@@ -307,7 +307,7 @@ class Conv2DBackpropFilter(PrimitiveWithInfer):
                  stride=(1, 1),
                  dilation=(1, 1, 1, 1),
                  group=1):
-        """init Convolution"""
+        """Initialize Convolution"""
         self.init_prim_io_names(inputs=['out_backprop', 'input', 'filter_sizes'], outputs=['output'])
         self.out_channel = out_channel
         self.kernel_size = kernel_size
@@ -373,7 +373,7 @@ class DepthwiseConv2dNativeBackpropFilter(PrimitiveWithInfer):
                  stride=1,
                  dilation=1,
                  group=1):
-        """init Convolution"""
+        """Initialize Convolution"""
         self.init_prim_io_names(inputs=['input', 'filter_size', 'dout'], outputs=['output'])
         self.channel_multiplier = channel_multiplier
         self.kernel_size = kernel_size
@@ -434,7 +434,7 @@ class DepthwiseConv2dNativeBackpropInput(PrimitiveWithInfer):
                  stride=1,
                  dilation=1,
                  group=1):
-        """init Convolution"""
+        """Initialize Convolution"""
         self.init_prim_io_names(inputs=['input_size', 'filter', 'dout'], outputs=['output'])
         self.channel_multiplier = channel_multiplier
         self.kernel_size = kernel_size
@@ -588,7 +588,7 @@ class GeluGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init GeluGrad"""
+        """Initialize GeluGrad"""
 
     def infer_shape(self, y_backprop_shape, x_shape, y_shape):
         return x_shape
@@ -726,12 +726,12 @@ class MaxPoolGradGrad(_PoolGrad):
               will be returned without padding. Extra pixels will be discarded.
 
     Inputs:
-        - **origin_input** (Tensor) - Tensor with data format "NCHW", data type should be float16.
+        - **origin_input** (Tensor) - Tensor with data format "NCHW", data type must be float16.
         - **origin_output** (Tensor) - Data type same as `origin_input`.
         - **grad** (Tensor) - Data type same as `origin_input`.
 
     Outputs:
-        Tensor, With data type same as `origin_input`.
+        Tensor, with data type same as `origin_input`.
 
     """
 
@@ -753,7 +753,7 @@ class MaximumGrad(Primitive):
 
     @prim_attr_register
     def __init__(self, grad_x=True, grad_y=True):
-        """Init MaximumGrad"""
+        """Initialize MaximumGrad"""
 
     def __call__(self, x, y, dout):
         raise NotImplementedError
@@ -799,12 +799,12 @@ class MaxPoolGradGradWithArgmax(_PoolGrad):
               will be returned without padding. Extra pixels will be discarded.
 
     Inputs:
-        - **x** (Tensor) - Tensor with data format "NCHW", data type should be float16.
+        - **x** (Tensor) - Tensor with data format "NCHW", data type must be float16.
         - **grad** (Tensor) - Data type same as `x`.
-        - **argmax** (Tensor) - Data type should be uint16 or int64.
+        - **argmax** (Tensor) - Data type must be uint16 or int64.
 
     Outputs:
-        Tensor, With data type same as `x`.
+        Tensor, with data type same as `x`.
 
     """
 
@@ -829,7 +829,7 @@ class MinimumGrad(Primitive):
 
     @prim_attr_register
     def __init__(self, grad_x=True, grad_y=True):
-        """Init MinimumGrad"""
+        """Initialize MinimumGrad"""
 
     def __call__(self, x, y, dout):
         raise NotImplementedError
@@ -844,8 +844,8 @@ class L2NormalizeGrad(PrimitiveWithInfer):
         epsilon (float): A small value added for numerical stability. Default: 1e-4.
 
     Inputs:
-        - **input_x** (Tensor) - Should be the input `weight` of forward operator L2Normalize.
-        - **out** (Tensor) - Should be the output of forward operator L2Normalize.
+        - **input_x** (Tensor) - Must be the input `weight` of forward operator L2Normalize.
+        - **out** (Tensor) - Must be the output of forward operator L2Normalize.
         - **dout** (Tensor) - The backprop of the next layer.
 
     Outputs:
@@ -897,7 +897,7 @@ class LogSoftmaxGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=-1):
-        """init LogSoftmaxGrad"""
+        """Initialize LogSoftmaxGrad"""
         validator.check_value_type("axis", axis, [int], self.name)
 
     def infer_shape(self, dout, logits):
@@ -1106,8 +1106,8 @@ class PReLUGrad(PrimitiveWithInfer):
 
     Inputs:
         - **y_backprop** (Tensor) - Representing the backprop of the next layer.
-        - **input_x** (Tensor) - Should be the input `input_x` of forward operator PRelu.
-        - **weight** (Tensor) - Float Tensor, w > 0, should be the input `weight` of forward operator PRelu.
+        - **input_x** (Tensor) - Must be the input `input_x` of forward operator PRelu.
+        - **weight** (Tensor) - Float Tensor, w > 0, must be the input `weight` of forward operator PRelu.
 
     Outputs:
         Tensor, with the same type as `input_x`.
@@ -1135,7 +1135,7 @@ class ReluGrad(Primitive):
 
     @prim_attr_register
     def __init__(self):
-        """init ReluGrad"""
+        """Initialize ReluGrad"""
         self.init_prim_io_names(inputs=['y_backprop', 'x'], outputs=['output'])
 
     def __call__(self, y_backprop, x):
@@ -1185,7 +1185,7 @@ class EluGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init EluGrad"""
+        """Initialize EluGrad"""
 
     def infer_shape(self, y_grad_shape, x_shape):
         return x_shape
@@ -1224,7 +1224,7 @@ class ResizeNearestNeighborGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, align_corners=False):
-        """Init ResizeNearestNeighborGrad"""
+        """Initialize ResizeNearestNeighborGrad"""
         self.init_prim_io_names(inputs=['grads', 'size'], outputs=['y'])
 
     def __infer__(self, grads, size):
@@ -1247,7 +1247,7 @@ class ROIAlignGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, xdiff_shape, pooled_height, pooled_width, spatial_scale, sample_num=2):
-        """init ROIAlignGrad"""
+        """Initialize ROIAlignGrad"""
         validator.check_value_type("pooled_height", pooled_height, [int], self.name)
         validator.check_value_type("pooled_width", pooled_width, [int], self.name)
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
@@ -1319,7 +1319,7 @@ class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init SigmoidCrossEntropyWithLogitsGrad"""
+        """Initialize SigmoidCrossEntropyWithLogitsGrad"""
         self.init_prim_io_names(inputs=['x', 'y', 'dout'], outputs=['x_grad'])
 
     def infer_shape(self, x_shape, y_shape, dout_shape):
@@ -1338,7 +1338,7 @@ class SliceGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init SliceGrad"""
+        """Initialize SliceGrad"""
         self.init_prim_io_names(inputs=['dy', 'x', 'begin', 'size'], outputs=['dx'])
 
     def __infer__(self, dy, x, begin, size):
@@ -1392,7 +1392,7 @@ class StridedSliceGrad(PrimitiveWithInfer):
                  ellipsis_mask=0,
                  new_axis_mask=0,
                  shrink_axis_mask=0):
-        """init StrideSliceGrad"""
+        """Initialize StrideSliceGrad"""
         validator.check_value_type('begin_mask', begin_mask, [int], self.name)
         validator.check_value_type('end_mask', end_mask, [int], self.name)
         validator.check_value_type('ellipsis_mask', ellipsis_mask, [int], self.name)
@@ -1440,7 +1440,7 @@ class StridedSliceGradAICPU(PrimitiveWithInfer):
                  ellipsis_mask=0,
                  new_axis_mask=0,
                  shrink_axis_mask=0):
-        """init StrideSliceGrad"""
+        """Initialize StrideSliceGrad"""
         validator.check_value_type('begin_mask', begin_mask, [int], self.name)
         validator.check_value_type('end_mask', end_mask, [int], self.name)
         validator.check_value_type('ellipsis_mask', ellipsis_mask, [int], self.name)
@@ -1504,7 +1504,7 @@ class MirrorPadGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, mode="REFLECT"):
-        """init MirrorPad"""
+        """Initialize MirrorPad"""
         validator.check_string('mode', mode, ['REFLECT', 'SYMMETRIC'], self.name)
         self.mode = mode
 
@@ -1528,7 +1528,7 @@ class MirrorPadGrad(PrimitiveWithInfer):
 
 class EmbeddingLookupCommGrad(PrimitiveWithInfer):
     """
-    Perform the gradient for the communication part of EmbeddingLookup operator.
+    Performs the gradient for the communication part of EmbeddingLookup operator.
 
     This works ONLY when 'reduce_scatter_flag' is True in 'EmbeddingLookup'. Roughly speaking,
     this primitive is implemented by StridedSlice --> _HostAllGather --> Concat. This primitive runs on host.
@@ -1542,7 +1542,7 @@ class EmbeddingLookupCommGrad(PrimitiveWithInfer):
     def __infer__(self, dy, split_num):
         """
         This primitive is implemented by three steps:
-            1) Split the 'dy' along dimension 0 into 'split_num' parts.
+            1) Splits the 'dy' along dimension 0 into 'split_num' parts.
             2) For each part, perform _HostAllGather((0, 1, 2, 3, 4, 5, 6, 7)) on the host.
             3) After _HostAllGather, there are still 'split_num' parts in each process. Then, perform Concat on them
               along dimension 0.
@@ -1600,7 +1600,7 @@ class AtanGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AtanGrad"""
+        """Initialize AtanGrad"""
 
     def infer_shape(self, x, dout):
         validator.check("x shape", x, "dout shape", dout, Rel.EQ, self.name)
diff --git a/mindspore/ops/operations/_inner_ops.py b/mindspore/ops/operations/_inner_ops.py
index a6d641ac5e..9d240beff4 100644
--- a/mindspore/ops/operations/_inner_ops.py
+++ b/mindspore/ops/operations/_inner_ops.py
@@ -34,7 +34,7 @@ class StridedSliceAICPU(PrimitiveWithInfer):
 
     Note:
         The stride may be negative value, which causes reverse slicing.
-        The shape of `begin`, `end` and `strides` should be the same.
+        The shape of `begin`, `end` and `strides` must be the same.
 
     Args:
         begin_mask (int): Starting index of the slice. Default: 0.
@@ -85,7 +85,7 @@ class StridedSliceAICPU(PrimitiveWithInfer):
                  ellipsis_mask=0,
                  new_axis_mask=0,
                  shrink_axis_mask=0):
-        """init StrideSlice"""
+        """Initialize StrideSlice"""
         self.init_prim_io_names(inputs=['x', 'begin', 'end', 'strides'], outputs=['output'])
         validator.check_value_type('begin_mask', begin_mask, [int], self.name)
         validator.check_value_type('end_mask', end_mask, [int], self.name)
@@ -155,16 +155,16 @@ class StridedSliceAICPU(PrimitiveWithInfer):
 
 class ExtractImagePatches(PrimitiveWithInfer):
     """
-    Extract patches from images.
+    Extracts patches from images.
     The input tensor must be a 4-D tensor and the data format is NHWC.
 
     Args:
-        ksizes (Union[tuple[int], list[int]]): The size of sliding window, should be a tuple or a list of integers,
+        ksizes (Union[tuple[int], list[int]]): The size of sliding window, must be a tuple or a list of integers,
             and the format is [1, ksize_row, ksize_col, 1].
         strides (Union[tuple[int], list[int]]): Distance between the centers of the two consecutive patches,
-            should be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
+            must be a tuple or list of int, and the format is [1, stride_row, stride_col, 1].
         rates (Union[tuple[int], list[int]]): In each extracted patch, the gap between the corresponding dimension
-            pixel positions, should be a tuple or a list of integers, and the format is [1, rate_row, rate_col, 1].
+            pixel positions, must be a tuple or a list of integers, and the format is [1, rate_row, rate_col, 1].
         padding (str): The type of padding algorithm, is a string whose value is "same" or "valid",
             not case sensitive. Default: "valid".
 
@@ -311,11 +311,11 @@ class Quant(PrimitiveWithInfer):
         scale (float) : Specifies the scaling ratio.
         offset (float): Specifies the offset.
         sqrt_mode (bool) : Specifies whether to perform square root on `scale`. Default: False.
-        round_mode (str): Specifies the way to round. Should be one of ["Round", "Floor", "Ceil", "Trunc"].
+        round_mode (str): Specifies the way to round. Must be one of ["Round", "Floor", "Ceil", "Trunc"].
           Default: "Round".
 
     Inputs:
-        - **input_x** (Tensor) : Input tensor. Its data type should be mindspore.float16 or mindspore.float32.
+        - **input_x** (Tensor) : Input tensor. Its data type must be mindspore.float16 or mindspore.float32.
 
     Outputs:
         - Tensor: The quantized output tensor of type mindspore.int8.
@@ -367,9 +367,9 @@ class Dequant(PrimitiveWithInfer):
         relu_flag (bool): Specifies whether to perform ReLU. Default: False.
 
     Inputs:
-        - **input_x** (Tensor) : Input tensor. Should be mindspore.int32.
+        - **input_x** (Tensor) : Input tensor. Must be mindspore.int32.
         - **deq_scale** (Tensor) : Specifies the scaling ratio.
-          Data type should be mindspore.float16 or mindspore.uint64
+          Data type must be mindspore.float16 or mindspore.uint64
 
     Outputs:
         - Tensor: The quantized output tensor of type mindspore.float16.
@@ -463,7 +463,7 @@ class MatrixDiag(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init MatrixDiag"""
+        """Initialize MatrixDiag"""
 
     def infer_dtype(self, x_dtype, assist_dtype):
         valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
@@ -499,7 +499,7 @@ class MatrixDiagPart(PrimitiveWithInfer):
         - **assist** (Tensor) - A eye tensor of the same type as `x`. With shape same as `x`.
 
     Outputs:
-        Tensor, data type same as input `x`. The shape should be x.shape[:-2] + [min(x.shape[-2:])].
+        Tensor, data type same as input `x`. The shape must be x.shape[:-2] + [min(x.shape[-2:])].
 
     Examples:
         >>> x = Tensor([[[-1, 0], [0, 1]], [[-1, 0], [0, 1]], [[-1, 0], [0, 1]]], mindspore.float32)
@@ -511,7 +511,7 @@ class MatrixDiagPart(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init MatrixDiagPart"""
+        """Initialize MatrixDiagPart"""
 
     def infer_dtype(self, x_dtype, assist_dtype):
         valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
@@ -532,7 +532,7 @@ class MatrixDiagPart(PrimitiveWithInfer):
 
 class MatrixSetDiag(PrimitiveWithInfer):
     r"""
-    Modify the batched diagonal part of a batched tensor.
+    Modifies the batched diagonal part of a batched tensor.
 
     Inputs:
         - **x** (Tensor) - The batched tensor. It can be one of the following data types:
@@ -554,7 +554,7 @@ class MatrixSetDiag(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init MatrixSetDiag"""
+        """Initialize MatrixSetDiag"""
 
     def infer_dtype(self, x_dtype, diagonal_dtype, assist_dtype):
         valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
diff --git a/mindspore/ops/operations/_quant_ops.py b/mindspore/ops/operations/_quant_ops.py
index 21752c461d..5b07ce460a 100644
--- a/mindspore/ops/operations/_quant_ops.py
+++ b/mindspore/ops/operations/_quant_ops.py
@@ -44,10 +44,10 @@ __all__ = ["MinMaxUpdatePerLayer",
 
 class MinMaxUpdatePerLayer(PrimitiveWithInfer):
     r"""
-    Update min and max per layer.
+    Updates min and max per layer.
 
     Args:
-        ema (bool): Use EMA algorithm update value min and max. Default: False.
+        ema (bool): Uses EMA algorithm update value min and max. Default: False.
         ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
 
     Inputs:
@@ -56,7 +56,7 @@ class MinMaxUpdatePerLayer(PrimitiveWithInfer):
         - **max** (Tensor) : Value of the max range of the input data x.
 
     Outputs:
-        - Tensor: Simulate quantize tensor of x.
+        - Tensor: Simulates quantize tensor of x.
 
     Examples:
         >>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
@@ -68,7 +68,7 @@ class MinMaxUpdatePerLayer(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, ema=False, ema_decay=0.999):
-        """init FakeQuantMinMaxPerLayerUpdate OP"""
+        """Initialize FakeQuantMinMaxPerLayerUpdate OP"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import minmax_update_perlayer
         if ema and not ema_decay:
@@ -101,10 +101,10 @@ class MinMaxUpdatePerLayer(PrimitiveWithInfer):
 
 class MinMaxUpdatePerChannel(PrimitiveWithInfer):
     r"""
-     Update min and max per channel.
+     Updates min and max per channel.
 
     Args:
-        ema (bool): Use EMA algorithm update value min and max. Default: False.
+        ema (bool): Uses EMA algorithm update value min and max. Default: False.
         ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
         channel_axis (int): Quantization by channel axis. Ascend backend only supports 0 or 1. Default: 1.
 
@@ -114,7 +114,7 @@ class MinMaxUpdatePerChannel(PrimitiveWithInfer):
         - **max** (Tensor) : Value of the max range of the input data x.
 
     Outputs:
-        - Tensor: Simulate quantize tensor of x.
+        - Tensor: Simulates quantize tensor of x.
 
     Examples:
         >>> x = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
@@ -127,7 +127,7 @@ class MinMaxUpdatePerChannel(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, ema=False, ema_decay=0.999, channel_axis=1):
-        """init FakeQuantPerChannelUpdate OP for Ascend"""
+        """Initialize FakeQuantPerChannelUpdate OP for Ascend"""
         self.is_ascend = context.get_context('device_target') == "Ascend"
         if self.is_ascend:
             from mindspore.ops._op_impl._custom_op import minmax_update_perchannel
@@ -169,11 +169,11 @@ class MinMaxUpdatePerChannel(PrimitiveWithInfer):
 
 class FakeQuantPerLayer(PrimitiveWithInfer):
     r"""
-    Simulate the quantize and dequantize operations in training time.
+    Simulates the quantize and dequantize operations in training time.
 
     Args:
         num_bits (int) : Number bits for quantization aware. Default: 8.
-        ema (bool): Use EMA algorithm update value min and max. Default: False.
+        ema (bool): Uses EMA algorithm update value min and max. Default: False.
         ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
         quant_delay (int): Quantilization delay parameter. Before delay step in training time not update
             simulate quantization aware funcion. After delay step in training time begin simulate the aware
@@ -188,7 +188,7 @@ class FakeQuantPerLayer(PrimitiveWithInfer):
         - **max** (Tensor) : Value of the max range of the input data x.
 
     Outputs:
-        - Tensor: Simulate quantize tensor of x.
+        - Tensor: Simulates quantize tensor of x.
 
     Examples:
         >>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
@@ -207,7 +207,7 @@ class FakeQuantPerLayer(PrimitiveWithInfer):
                  symmetric=False,
                  narrow_range=False,
                  training=True):
-        """init FakeQuantPerLayer OP"""
+        """Initialize FakeQuantPerLayer OP"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import fake_quant_perlayer
         if num_bits not in self.support_quant_bit:
@@ -309,11 +309,11 @@ class FakeQuantPerLayerGrad(PrimitiveWithInfer):
 
 class FakeQuantPerChannel(PrimitiveWithInfer):
     r"""
-    Simulate the quantize and dequantize operations in training time base on per channel.
+    Simulates the quantize and dequantize operations in training time base on per channel.
 
     Args:
         num_bits (int) : Number bits to quantilization. Default: 8.
-        ema (bool): Use EMA algorithm update tensor min and tensor max. Default: False.
+        ema (bool): Uses EMA algorithm update tensor min and tensor max. Default: False.
         ema_decay (int) : EMA algorithm decay parameter. Default: 0.999.
         quant_delay (int): Quantilization delay  parameter. Before delay step in training time not
             update the weight data to simulate quantize operation. After delay step in training time
@@ -351,7 +351,7 @@ class FakeQuantPerChannel(PrimitiveWithInfer):
                  narrow_range=False,
                  training=True,
                  channel_axis=1):
-        """init FakeQuantPerChannel OP"""
+        """Initialize FakeQuantPerChannel OP"""
         self.is_ascend = context.get_context('device_target') == "Ascend"
         if self.is_ascend:
             from mindspore.ops._op_impl._custom_op import fake_quant_perchannel
@@ -426,7 +426,7 @@ class FakeQuantPerChannelGrad(PrimitiveWithInfer):
                  symmetric=False,
                  narrow_range=False,
                  channel_axis=1):
-        """init FakeQuantPerChannelGrad Fill"""
+        """Initialize FakeQuantPerChannelGrad Fill"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import fake_quant_perchannel_grad
         if num_bits not in self.support_quant_bit:
@@ -468,7 +468,7 @@ class BatchNormFold(PrimitiveWithInfer):
     Batch normalization folded.
 
     Args:
-        momentum (float): Momentum value should be [0, 1]. Default: 0.9.
+        momentum (float): Momentum value must be [0, 1]. Default: 0.9.
         epsilon (float): A small float number to avoid dividing by 0. 1e-5 if dtype in
             float32 else 1e-3. Default: 1e-5.
         is_training (bool): In training mode set True, else set False. Default: True.
@@ -501,7 +501,7 @@ class BatchNormFold(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, momentum=0.9, epsilon=1e-5, is_training=True, freeze_bn=0):
-        """init batch norm fold layer"""
+        """Initialize batch norm fold layer"""
         self.momentum = validator.check_number_range('momentum', momentum, 0, 1, Rel.INC_BOTH, self.name)
         self.epsilon = validator.check_float_positive('epsilon', epsilon, self.name)
         self.is_training = validator.check_value_type('is_training', is_training, (bool,), self.name)
@@ -543,7 +543,7 @@ class BatchNormFoldGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, epsilon=1e-5, is_training=True, freeze_bn=0):
-        """init BatchNormGrad layer"""
+        """Initialize BatchNormGrad layer"""
         self.is_training = validator.check_value_type('is_training', is_training, (bool,), self.name)
         self.freeze_bn = validator.check_value_type('freeze_bn', freeze_bn, (int,), self.name)
         self.epsilon = validator.check_float_positive('epsilon', epsilon, self.name)
@@ -574,7 +574,7 @@ class BatchNormFoldGrad(PrimitiveWithInfer):
 
 class CorrectionMul(PrimitiveWithInfer):
     """
-    Scale the weights with a correction factor to the long term statistics
+    Scales the weights with a correction factor to the long term statistics
     prior to quantization. This ensures that there is no jitter in the quantized weights
     due to batch to batch variation.
 
@@ -596,7 +596,7 @@ class CorrectionMul(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, channel_axis=0):
-        """init correction mul layer"""
+        """Initialize correction mul layer"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import correction_mul
         self.channel_axis = channel_axis
@@ -630,7 +630,7 @@ class CorrectionMulGrad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, channel_axis=0):
-        """init correction mul layer"""
+        """Initialize correction mul layer"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import correction_mul_grad
         self.channel_axis = channel_axis
@@ -670,7 +670,7 @@ class CorrectionMulGradReduce(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, channel_axis=0):
-        """init correction mul reduce layer"""
+        """Initialize correction mul reduce layer"""
         if context.get_context('device_target') == "Ascend":
             from mindspore.ops._op_impl._custom_op import correction_mul_grad
         self.channel_axis = channel_axis
@@ -686,7 +686,7 @@ class CorrectionMulGradReduce(PrimitiveWithInfer):
 
 class BatchNormFold2(PrimitiveWithInfer):
     """
-    Scale the bias with a correction factor to the long term statistics
+    Scales the bias with a correction factor to the long term statistics
     prior to quantization. This ensures that there is no jitter in the quantized bias
     due to batch to batch variation.
 
@@ -720,7 +720,7 @@ class BatchNormFold2(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, freeze_bn=0):
-        """init conv2d fold layer"""
+        """Initialize conv2d fold layer"""
         self.freeze_bn = validator.check_value_type('freeze_bn', freeze_bn, (int,), self.name)
         self.init_prim_io_names(inputs=['x', 'beta', 'gamma', 'batch_std', 'batch_mean',
                                         'running_std', 'running_mean', 'global_step'],
@@ -767,7 +767,7 @@ class BatchNormFold2Grad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, freeze_bn=0):
-        """init MulFold layer"""
+        """Initialize MulFold layer"""
         self.freeze_bn = freeze_bn
         self.init_prim_io_names(inputs=['dout', 'x', 'gamma',
                                         'batch_std', 'batch_mean',
@@ -811,7 +811,7 @@ class BatchNormFoldD(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, momentum=0.9, epsilon=1e-5, is_training=True, freeze_bn=0):
-        """init _BatchNormFold layer"""
+        """Initialize _BatchNormFold layer"""
         from mindspore.ops._op_impl._custom_op import batchnorm_fold
         self.momentum = validator.check_number_range('momentum', momentum, 0, 1, Rel.INC_BOTH, self.name)
         self.epsilon = validator.check_float_positive('epsilon', epsilon, self.name)
@@ -840,7 +840,7 @@ class BatchNormFoldGradD(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, epsilon=1e-5, is_training=True, freeze_bn=0):
-        """init _BatchNormFoldGrad layer"""
+        """Initialize _BatchNormFoldGrad layer"""
         from mindspore.ops._op_impl._custom_op import batchnorm_fold_grad
         self.epsilon = validator.check_float_positive('epsilon', epsilon, self.name)
         self.is_training = validator.check_value_type('is_training', is_training, (bool,), self.name)
@@ -867,7 +867,7 @@ class BatchNormFoldGradD(PrimitiveWithInfer):
 
 class BatchNormFold2_D(PrimitiveWithInfer):
     """
-    Scale the bias with a correction factor to the long term statistics
+    Scales the bias with a correction factor to the long term statistics
     prior to quantization. This ensures that there is no jitter in the quantized bias
     due to batch to batch variation.
 
@@ -889,7 +889,7 @@ class BatchNormFold2_D(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, freeze_bn=0):
-        """init conv2d fold layer"""
+        """Initialize conv2d fold layer"""
         from mindspore.ops._op_impl._custom_op import batchnorm_fold2
         self.init_prim_io_names(inputs=['x', 'beta', 'gamma', 'batch_std', 'batch_mean', 'running_std'],
                                 outputs=['y'])
@@ -916,7 +916,7 @@ class BatchNormFold2GradD(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, freeze_bn=False):
-        """init MulFold layer"""
+        """Initialize MulFold layer"""
         from mindspore.ops._op_impl._custom_op import batchnorm_fold2_grad
         self.freeze_bn = freeze_bn
         self.init_prim_io_names(
@@ -954,7 +954,7 @@ class BatchNormFold2GradReduce(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, freeze_bn=False):
-        """init MulFold layer"""
+        """Initialize MulFold layer"""
         from mindspore.ops._op_impl._custom_op import batchnorm_fold2_grad_reduce
         self.freeze_bn = freeze_bn
         self.init_prim_io_names(inputs=['dout', 'x'],
diff --git a/mindspore/ops/operations/_thor_ops.py b/mindspore/ops/operations/_thor_ops.py
index a8f336841c..e0149a6fce 100644
--- a/mindspore/ops/operations/_thor_ops.py
+++ b/mindspore/ops/operations/_thor_ops.py
@@ -88,7 +88,7 @@ class CusBatchMatMul(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusBatchMatMul"""
+        """Initialize CusBatchMatMul"""
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.batch_matmul_impl import CusBatchMatMul
 
@@ -121,7 +121,7 @@ class CusCholeskyTrsm(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusCholeskyTrsm"""
+        """Initialize CusCholeskyTrsm"""
         self.init_prim_io_names(inputs=['x1'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.cholesky_trsm_impl import CusCholeskyTrsm
 
@@ -140,7 +140,7 @@ class CusCholeskyTrsm(PrimitiveWithInfer):
 
 class CusFusedAbsMax1(PrimitiveWithInfer):
     """
-    Compute the abs max of Tensor input.
+    Computes the abs max of Tensor input.
 
     The rank of input tensors must be `4` or `2`.
     Inputs:
@@ -157,7 +157,7 @@ class CusFusedAbsMax1(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, origin_shape=[-1, -1]):
-        """init CusFusedAbsMax1"""
+        """Initialize CusFusedAbsMax1"""
         self.init_prim_io_names(inputs=['x1'], outputs=['y'])
         self.origin_shape = origin_shape
         from mindspore.ops._op_impl._custom_op.fused_abs_max1_impl import CusFusedAbsMax1
@@ -176,7 +176,7 @@ class CusFusedAbsMax1(PrimitiveWithInfer):
 
 class CusImg2Col(PrimitiveWithInfer):
     """
-    Img2col the feature map and the result in reorganized in NC1HWC0.
+    Img2cols the feature map and the result in reorganized in NC1HWC0.
 
     Args:
         - **strides** (listInt) - the stride of the ops.
@@ -193,7 +193,7 @@ class CusImg2Col(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, ksizes, strides, dilates=(1, 1, 1, 1), mode="NC1HWC0"):
-        """init CusImg2Col"""
+        """Initialize CusImg2Col"""
         self.init_prim_io_names(inputs=['x1'], outputs=['y'])
         self.ksizes = ksizes
         self.strides = strides
@@ -239,7 +239,7 @@ class CusMatMulCubeDenseLeft(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusMatMulCubeDenseLeft"""
+        """Initialize CusMatMulCubeDenseLeft"""
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.matmul_cube_dense_left_impl import CusMatMulCubeDenseLeft
 
@@ -274,7 +274,7 @@ class CusMatMulCubeFraczRightMul(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusMatMulCubeFraczRightMul"""
+        """Initialize CusMatMulCubeFraczRightMul"""
         self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_right_mul_impl import CusMatMulCubeFraczRightMul
 
@@ -292,14 +292,14 @@ class CusMatMulCube(PrimitiveWithInfer):
     The rank of input tensors must be `2`.
 
     Args:
-        transpose_a (bool): If True, `a` is transposed before multiplication. Default: False.
-        transpose_b (bool): If True, `b` is transposed before multiplication. Default: False.
+        transpose_a (bool): If true, `a` is transposed before multiplication. Default: False.
+        transpose_b (bool): If true, `b` is transposed before multiplication. Default: False.
 
     Inputs:
         - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. If
-          `transpose_a` is True, its shape should be :math:`(N, C)` after transposing.
+          `transpose_a` is True, its shape must be :math:`(N, C)` after transposing.
         - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`. If
-          `transpose_b` is True, its shape should be :math:`(C, M)` after transpose.
+          `transpose_b` is True, its shape must be :math:`(C, M)` after transpose.
 
     Outputs:
         Tensor, the shape of the output tensor is :math:`(N, M)`.
@@ -313,7 +313,7 @@ class CusMatMulCube(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, transpose_a=False, transpose_b=False):
-        """init CusMatMulCube"""
+        """Initialize CusMatMulCube"""
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
         self.transpose_a = transpose_a
         self.transpose_b = transpose_b
@@ -355,7 +355,7 @@ class CusMatrixCombine(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusMatrixCombine"""
+        """Initialize CusMatrixCombine"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.matrix_combine_impl import CusMatrixCombine
 
@@ -389,7 +389,7 @@ class CusTranspose02314(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusTranspose02314"""
+        """Initialize CusTranspose02314"""
         self.init_prim_io_names(inputs=['x1'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.transpose02314_impl import CusTranspose02314
 
@@ -435,7 +435,7 @@ class CusMatMulCubeDenseRight(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusMatMulCubeDenseRight"""
+        """Initialize CusMatMulCubeDenseRight"""
         self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.matmul_cube_dense_right_impl import CusMatMulCubeDenseRight
 
@@ -470,7 +470,7 @@ class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init CusMatMulCubeFraczLeftCast"""
+        """Initialize CusMatMulCubeFraczLeftCast"""
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
         from mindspore.ops._op_impl._custom_op.matmul_cube_fracz_left_cast_impl import CusMatMulCubeFraczLeftCast
 
@@ -483,7 +483,7 @@ class CusMatMulCubeFraczLeftCast(PrimitiveWithInfer):
 
 class Im2Col(PrimitiveWithInfer):
     """
-    extract image pathes from image.
+    extracts image pathes from image.
 
     The rank of input_x1 must be `4`, data_format is "NCHW".
 
@@ -504,7 +504,7 @@ class Im2Col(PrimitiveWithInfer):
                  pad=0,
                  stride=1,
                  dilation=1):
-        """init Im2Col"""
+        """Initialize Im2Col"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
         self.add_prim_attr('kernel_size', self.kernel_size)
@@ -564,7 +564,7 @@ class Im2Col(PrimitiveWithInfer):
 
 class UpdateThorGradient(PrimitiveWithInfer):
     """
-    Update Thor Gradient with Approximate Fisher info matrix(for GPU backend).
+    Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend).
 
     The rank of input_x1 must be `3`, which indicates the A matrix.
     The rank of input_x2 must be `2`, which indicates the 1st-order gradient.
@@ -593,7 +593,7 @@ class UpdateThorGradient(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, split_dim=0):
-        """init UpdateThorGradient"""
+        """Initialize UpdateThorGradient"""
         self.init_prim_io_names(inputs=['x1', 'x2', 'x3'], outputs=['y'])
         self.split_dim = split_dim
         self.add_prim_attr('split_dim', self.split_dim)
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index bdff1ef76a..83f839f9a5 100644
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -38,7 +38,7 @@ from ...common.tensor import Tensor
 
 class _ScatterOp(PrimitiveWithInfer):
     """
-    Define Scatter operators
+    Defines Scatter operators
     """
     __mindspore_signature__ = (
         sig.make_sig('x', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
@@ -54,7 +54,7 @@ class _ScatterOp(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, use_locking=False):
-        """Init _ScatterOp"""
+        """Initialize _ScatterOp"""
         validator.check_value_type('use_locking', use_locking, [bool], self.name)
         self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
 
@@ -71,7 +71,7 @@ class _ScatterOp(PrimitiveWithInfer):
 
 class _ScatterNdOp(_ScatterOp):
     """
-    Define _ScatterNd operators
+    Defines _ScatterNd operators
     """
     def _check_scatter_shape(self, x_shape, indices_shape, updates_shape, prim_name):
         validator.check('the dimension of x', len(x_shape),
@@ -121,7 +121,7 @@ class ExpandDims(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ExpandDims"""
+        """Initialize ExpandDims"""
         self.init_prim_io_names(inputs=['x', 'axis'], outputs=['output'])
 
     def __infer__(self, x, axis):
@@ -166,7 +166,7 @@ class DType(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init DType"""
+        """Initialize DType"""
 
     def __infer__(self, x):
         validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name)
@@ -200,7 +200,7 @@ class SameTypeShape(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Same"""
+        """Initialize Same"""
 
     def __call__(self, x, y):
         """run in PyNative mode"""
@@ -241,7 +241,7 @@ class Cast(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self):
         # if primitive need setattr in __infer__ need add this flag
-        """init Cast"""
+        """Initialize Cast"""
         self.init_prim_io_names(inputs=['x', 'dst_type'], outputs=['output'])
 
     def check_elim(self, x, dtype):
@@ -289,7 +289,7 @@ class Cast(PrimitiveWithInfer):
 
 class IsSubClass(PrimitiveWithInfer):
     """
-    Check whether one type is subtraction class of another type.
+    Checks whether one type is subtraction class of another type.
 
     Inputs:
         - **sub_type** (mindspore.dtype) - The type to be checked. Only constant value is allowed.
@@ -324,7 +324,7 @@ class IsSubClass(PrimitiveWithInfer):
 
 class IsInstance(PrimitiveWithInfer):
     """
-    Check whether an object is an instance of a target type.
+    Checks whether an object is an instance of a target type.
 
     Inputs:
         - **inst** (Any Object) - The instance to be checked. Only constant value is allowed.
@@ -388,7 +388,7 @@ class Reshape(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Reshape"""
+        """Initialize Reshape"""
         self.init_prim_io_names(inputs=['tensor', 'shape'], outputs=['output'])
 
     def __infer__(self, x, shape):
@@ -451,7 +451,7 @@ class Shape(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Shape"""
+        """Initialize Shape"""
 
     def __infer__(self, x):
         validator.check_subclass("input_x", x['dtype'], mstype.tensor, self.name)
@@ -479,7 +479,7 @@ class DynamicShape(Primitive):
 
     @prim_attr_register
     def __init__(self):
-        """init Shape"""
+        """Initialize Shape"""
 
 
 class Squeeze(PrimitiveWithInfer):
@@ -511,7 +511,7 @@ class Squeeze(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=()):
-        """init Squeeze"""
+        """Initialize Squeeze"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
         validator.check_value_type('axis', axis, [int, tuple], self.name)
         if isinstance(axis, tuple):
@@ -547,7 +547,7 @@ class Transpose(PrimitiveWithInfer):
     Inputs:
         - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
         - **input_perm** (tuple[int]) - The permutation to be converted. The input tuple is constructed by multiple
-          indexes. The length of `input_perm` and the shape of `input_x` should be the same. Only constant value is
+          indexes. The length of `input_perm` and the shape of `input_x` must be the same. Only constant value is
           allowed.
 
     Outputs:
@@ -563,7 +563,7 @@ class Transpose(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Transpose"""
+        """Initialize Transpose"""
         self.init_prim_io_names(inputs=['x', 'perm'], outputs=['output'])
 
     def __infer__(self, x, perm):
@@ -602,7 +602,7 @@ class Unique(Primitive):
         - **x** (Tensor) - The input tensor.
 
     Outputs:
-        Tuple, containing tensor objects `(y, idx)`, `y` is a tensor has the same type as `x`, `idx` is a tensor
+        Tuple, containing Tensor objects `(y, idx)`, `y` is a tensor has the same type as `x`, `idx` is a tensor
         containing indices of elements in the input coressponding to the output tensor.
 
     Examples:
@@ -642,7 +642,7 @@ class GatherV2(PrimitiveWithCheck):
 
     @prim_attr_register
     def __init__(self):
-        """init index_select"""
+        """Initialize index_select"""
         self.init_prim_io_names(inputs=['params', 'indices', 'axis'], outputs=['output'])
 
     def __check__(self, params, indices, axis):
@@ -680,14 +680,14 @@ class SparseGatherV2(GatherV2):
 
 class Padding(PrimitiveWithInfer):
     """
-    Extend the last dimension of input tensor from 1 to pad_dim_size, by filling with 0.
+    Extends the last dimension of input tensor from 1 to pad_dim_size, by filling with 0.
 
     Args:
         pad_dim_size (int): The value of the last dimension of x to be extended, which must be positive.
 
     Inputs:
-        - **x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. The rank of x should be at least 2.
-          The last dimension of x should be 1.
+        - **x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. The rank of x must be at least 2.
+          The last dimension of x must be 1.
 
     Outputs:
         Tensor, the shape of tensor is :math:`(z_1, z_2, ..., z_N)`.
@@ -700,7 +700,7 @@ class Padding(PrimitiveWithInfer):
     """
     @prim_attr_register
     def __init__(self, pad_dim_size=8):
-        """init padding"""
+        """Initialize padding"""
         validator.check_value_type("pad_dim_size", pad_dim_size, [int], self.name)
         validator.check_integer("pad_dim_size", pad_dim_size, 0, Rel.GT, self.name)
         self.pad_dim_size = pad_dim_size
@@ -746,7 +746,7 @@ class Split(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0, output_num=1):
-        """init Split"""
+        """Initialize Split"""
         validator.check_value_type("axis", axis, [int], self.name)
         validator.check_value_type("output_num", output_num, [int], self.name)
         self.axis = axis
@@ -798,7 +798,7 @@ class Rank(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Rank"""
+        """Initialize Rank"""
 
     def __infer__(self, x):
         validator.check_subclass("x", x['dtype'], mstype.tensor, self.name)
@@ -832,7 +832,7 @@ class TruncatedNormal(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, dtype=mstype.float32):
-        """init TruncatedNormal"""
+        """Initialize TruncatedNormal"""
         validator.check_value_type('seed', seed, [int], self.name)
         validator.check_type_same({'dtype': dtype}, mstype.number_type, self.name)
 
@@ -868,7 +868,7 @@ class Size(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Size"""
+        """Initialize Size"""
 
     def __infer__(self, x):
         size = 1
@@ -907,7 +907,7 @@ class Fill(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Fill"""
+        """Initialize Fill"""
 
     def __infer__(self, dtype, dims, x):
         validator.check_value_type("shape", dims['value'], [tuple], self.name)
@@ -950,7 +950,7 @@ class OnesLike(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init OnesLike"""
+        """Initialize OnesLike"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -982,7 +982,7 @@ class ZerosLike(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init ZerosLike"""
+        """Initialize ZerosLike"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, x_shape):
@@ -995,7 +995,7 @@ class ZerosLike(PrimitiveWithInfer):
 
 class TupleToArray(PrimitiveWithInfer):
     """
-    Convert a tuple to a tensor.
+    Converts a tuple to a tensor.
 
     If the type of the first number in the tuple is integer, the data type of the output tensor is int.
     Otherwise, the data type of the output tensor is float.
@@ -1012,7 +1012,7 @@ class TupleToArray(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init TupleToArray"""
+        """Initialize TupleToArray"""
 
     def infer_value(self, x):
         validator.check_value_type("x", x, [tuple], self.name)
@@ -1128,7 +1128,7 @@ class InvertPermutation(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init InvertPermutation"""
+        """Initialize InvertPermutation"""
         self.set_const_prim(True)
 
     def __infer__(self, x):
@@ -1190,7 +1190,7 @@ class Argmax(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=-1, output_type=mstype.int32):
-        """init Argmax"""
+        """Initialize Argmax"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
         validator.check_value_type("axis", axis, [int], self.name)
         validator.check_type_same({'output': output_type}, [mstype.int32, mstype.int64], self.name)
@@ -1238,7 +1238,7 @@ class Argmin(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=-1, output_type=mstype.int32):
-        """init Argmin"""
+        """Initialize Argmin"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
         validator.check_value_type("axis", axis, [int], self.name)
         validator.check_type_name("output_type", output_type, [mstype.int32, mstype.int64], self.name)
@@ -1294,7 +1294,7 @@ class ArgMaxWithValue(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0, keep_dims=False):
-        """init ArgMaxWithValue"""
+        """Initialize ArgMaxWithValue"""
         self.axis = axis
         self.keep_dims = keep_dims
         validator.check_value_type('keep_dims', keep_dims, [bool], self.name)
@@ -1346,7 +1346,7 @@ class ArgMinWithValue(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0, keep_dims=False):
-        """init ArgMinWithValue"""
+        """Initialize ArgMinWithValue"""
         self.axis = axis
         self.keep_dims = keep_dims
         validator.check_value_type('keep_dims', keep_dims, [bool], self.name)
@@ -1405,7 +1405,7 @@ class Tile(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Tile"""
+        """Initialize Tile"""
         self.init_prim_io_names(inputs=['x', 'multiples'], outputs=['output'])
 
     def check_elim(self, base_tensor, multiplier):
@@ -1453,7 +1453,7 @@ class UnsortedSegmentSum(PrimitiveWithInfer):
     range.
 
     If the sum of the given segment_ids :math:`i` is empty, then :math:`\text{output}[i] = 0`. If the given segment_ids
-    is negative, the value will be ignored. 'num_segments' should be equal to the number of different segment_ids.
+    is negative, the value will be ignored. 'num_segments' must be equal to the number of different segment_ids.
 
     Inputs:
         - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
@@ -1473,7 +1473,7 @@ class UnsortedSegmentSum(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init UnsortedSegmentSum"""
+        """Initialize UnsortedSegmentSum"""
         self.init_prim_io_names(inputs=['x', 'segment_ids', 'num_segments'], outputs=['y'])
 
     def __infer__(self, x, segment_ids, num_segments):
@@ -1510,13 +1510,13 @@ class UnsortedSegmentMin(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
-          The data type should be float16, float32 or int32.
-        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value should be >= 0.
+          The data type must be float16, float32 or int32.
+        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value must be >= 0.
           The data type must be int32.
         - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`.
 
     Outputs:
-        Tensor. Set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.
+        Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.
 
     Examples:
         >>> input_x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [4, 2, 1]]).astype(np.float32))
@@ -1529,7 +1529,7 @@ class UnsortedSegmentMin(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init UnsortedSegmentMin"""
+        """Initialize UnsortedSegmentMin"""
         self.init_prim_io_names(inputs=['x', 'segment_ids', 'num_segments'], outputs=['y'])
 
     def __infer__(self, x, segment_ids, num_segments):
@@ -1561,10 +1561,10 @@ class UnsortedSegmentProd(PrimitiveWithInfer):
     Inputs:
         - **input_x** (Tensor) - The shape is :math:`(x_1, x_2, ..., x_R)`.
           With float16, float32 or int32 data type.
-        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value should be >= 0.
+        - **segment_ids** (Tensor) - A `1-D` tensor whose shape is :math:`(x_1)`, the value must be >= 0.
           Data type must be int32.
         - **num_segments** (int) - The value spcifies the number of distinct `segment_ids`,
-          should be greater than 0.
+          must be greater than 0.
 
     Outputs:
         Tensor, set the number of `num_segments` as `N`, the shape is :math:`(N, x_2, ..., x_R)`.
@@ -1580,7 +1580,7 @@ class UnsortedSegmentProd(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init UnsortedSegmentProd"""
+        """Initialize UnsortedSegmentProd"""
         self.init_prim_io_names(inputs=['x', 'segment_ids', 'num_segments'], outputs=['y'])
 
     def __infer__(self, x, segment_ids, num_segments):
@@ -1609,9 +1609,9 @@ class UnsortedSegmentProd(PrimitiveWithInfer):
 
 class Concat(PrimitiveWithInfer):
     r"""
-    Concat tensor in specified axis.
+    Concats tensor in specified axis.
 
-    Concat input tensors along with the given axis.
+    Concats input tensors along with the given axis.
 
     Note:
         The input data is a tuple of tensors. These tensors have the same rank `R`. Set the given axis as `m`, and
@@ -1644,7 +1644,7 @@ class Concat(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0):
-        """init Tile"""
+        """Initialize Tile"""
         validator.check_value_type("axis", axis, [int], self.name)
 
     def __infer__(self, input_x):
@@ -1664,9 +1664,9 @@ class Concat(PrimitiveWithInfer):
 
 class ParallelConcat(PrimitiveWithInfer):
     r"""
-    Concat tensor in the first dimension.
+    Concats tensor in the first dimension.
 
-    Concat input tensors along with the first dimension.
+    Concats input tensors along with the first dimension.
 
     Note:
         The input tensors are all required to have size 1 in the first dimension.
@@ -1688,7 +1688,7 @@ class ParallelConcat(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ParallelConcat"""
+        """Initialize ParallelConcat"""
 
     def __infer__(self, values):
         x_shp = values['shape']
@@ -1770,7 +1770,7 @@ class Pack(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0):
-        """init Pack"""
+        """Initialize Pack"""
         validator.check_value_type("axis", axis, [int], self.name)
         self.axis = axis
 
@@ -1819,7 +1819,7 @@ class Unpack(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=0):
-        """init Unpack"""
+        """Initialize Unpack"""
         validator.check_value_type("axis", axis, [int], self.name)
         self.axis = axis
 
@@ -1853,7 +1853,7 @@ class Unpack(PrimitiveWithInfer):
 
 class Slice(PrimitiveWithInfer):
     """
-    Slice a tensor in the specified shape.
+    Slices a tensor in the specified shape.
 
     Args:
         x (Tensor): The target tensor.
@@ -1872,7 +1872,7 @@ class Slice(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init slice"""
+        """Initialize slice"""
         self.init_prim_io_names(inputs=['x', 'begin', 'size'], outputs=['output'])
 
     def __infer__(self, x, begin, size):
@@ -1900,7 +1900,7 @@ class Slice(PrimitiveWithInfer):
 
 class ReverseV2(PrimitiveWithInfer):
     """
-    Reverse specific dimensions of a tensor.
+    Reverses specific dimensions of a tensor.
 
     Args:
         axis (Union[tuple(int), list(int)): The indices of the dimensions to reverse.
@@ -1939,7 +1939,7 @@ class ReverseV2(PrimitiveWithInfer):
 
 class Rint(PrimitiveWithInfer):
     """
-    Return element-wise integer closest to x.
+    Returns element-wise integer closest to x.
 
     Inputs:
         - **input_x** (Tensor) - The target tensor, which must be one of the following types:
@@ -1970,7 +1970,7 @@ class Rint(PrimitiveWithInfer):
 class Select(PrimitiveWithInfer):
     r"""
 
-    Return the selected elements, either from input :math:`x` or input :math:`y`, depending on the `condition`.
+    Returns the selected elements, either from input :math:`x` or input :math:`y`, depending on the `condition`.
 
     Given a tensor as input, this operation inserts a dimension of 1 at the dimension,
     if both :math:`x` and :math:`y` are none, the operation returns the coordinates of the true
@@ -1987,7 +1987,7 @@ class Select(PrimitiveWithInfer):
     first dimension of :math:`x`, or must have the same shape as :math:`y`.
 
     The conditional tensor acts as an optional compensation (mask), which
-    determines whether the corresponding element / row in the output should be
+    determines whether the corresponding element / row in the output must be
     selected from :math:`x` (if true) or :math:`y` (if false) based on the value of each
     element.
 
@@ -2045,7 +2045,7 @@ class Select(PrimitiveWithInfer):
 
 
 def _compute_slicing_length(begin, end, stride, x_shape, i):
-    """Compute the length of the slicing."""
+    """Computes the length of the slicing."""
     if i >= len(x_shape):
         raise ValueError(f"For 'StridedSlice', When their is no new axis, the index length must be less or "
                          f"equal than the dim of x.")
@@ -2098,7 +2098,7 @@ def _compute_slicing_length(begin, end, stride, x_shape, i):
 class StridedSlice(PrimitiveWithInfer):
     r"""
 
-    Extract a strided slice of a tensor.
+    Extracts a strided slice of a tensor.
 
     Given an input tensor, this operation inserts a dimension of length 1 at the dimension.
     This operation extracts a fragment of size (end-begin)/stride from the given 'input_tensor'.
@@ -2107,7 +2107,7 @@ class StridedSlice(PrimitiveWithInfer):
 
     Note:
         The stride may be negative value, which causes reverse slicing.
-        The shape of `begin`, `end` and `strides` should be the same.
+        The shape of `begin`, `end` and `strides` must be the same.
 
     Args:
         begin_mask (int): Starting index of the slice. Default: 0.
@@ -2157,7 +2157,7 @@ class StridedSlice(PrimitiveWithInfer):
                  ellipsis_mask=0,
                  new_axis_mask=0,
                  shrink_axis_mask=0):
-        """Init StrideSlice"""
+        """Initialize StrideSlice"""
         self.init_prim_io_names(inputs=['x', 'begin', 'end', 'strides'], outputs=['output'])
         validator.check_integer('begin_mask', begin_mask, 0, Rel.GE, self.name)
         validator.check_integer('end_mask', end_mask, 0, Rel.GE, self.name)
@@ -2192,7 +2192,7 @@ class StridedSlice(PrimitiveWithInfer):
                 'value': value}
 
     def _compute_slicing_shape(self, x_shape, begin_v, end_v, strides_v):
-        """Compute the shape of the slicing."""
+        """Computes the shape of the slicing."""
         x_rank = len(x_shape)
         slice_len = len(begin_v)
 
@@ -2275,14 +2275,14 @@ class StridedSlice(PrimitiveWithInfer):
 class Diag(PrimitiveWithInfer):
     r"""
 
-    Construct a diagonal tensor with a given diagonal values.
+    Constructs a diagonal tensor with a given diagonal values.
 
     Assume `input_x` has dimensions :math:`[D_1,... D_k]`, the output is a tensor of
     rank 2k with dimensions :math:`[D_1,..., D_k, D_1,..., D_k]` where:
     :math:`output[i_1,..., i_k, i_1,..., i_k] = input_x[i_1,..., i_k]` and 0 everywhere else.
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor. The input shape should be less than 5d.
+        - **input_x** (Tensor) - The input tensor. The input shape must be less than 5d.
 
     Outputs:
         Tensor, has the same dtype as the `input_x`.
@@ -2299,7 +2299,7 @@ class Diag(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Diag"""
+        """Initialize Diag"""
 
     def infer_dtype(self, x_type):
         validator.check_subclass('input_x', x_type, mstype.tensor, self.name)
@@ -2324,7 +2324,7 @@ class Diag(PrimitiveWithInfer):
 class DiagPart(PrimitiveWithInfer):
     r"""
 
-    Extract the diagonal part from given tensor.
+    Extracts the diagonal part from given tensor.
 
     Assume input has dimensions :math:`[D_1,..., D_k, D_1,..., D_k]`, the output is a tensor
     of rank k with dimensions :math:`[D_1,..., D_k]` where:
@@ -2348,7 +2348,7 @@ class DiagPart(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init DiagPart"""
+        """Initialize DiagPart"""
 
     def infer_dtype(self, x_type):
         validator.check_subclass('input_x', x_type, mstype.tensor, self.name)
@@ -2379,7 +2379,7 @@ class DiagPart(PrimitiveWithInfer):
 class Eye(PrimitiveWithInfer):
     """
 
-    Create a tensor with ones on the diagonal and zeros the rest.
+    Creates a tensor with ones on the diagonal and zeros the rest.
 
     Inputs:
         - **n** (int) - The number of rows of returned tensor
@@ -2398,7 +2398,7 @@ class Eye(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Eye"""
+        """Initialize Eye"""
 
     def infer_value(self, n, m, t):
         validator.check_integer("n", n, 0, Rel.GT, self.name)
@@ -2412,9 +2412,9 @@ class Eye(PrimitiveWithInfer):
 
 class ScatterNd(PrimitiveWithInfer):
     """
-    Scatter a tensor into a new tensor depending on the specified indices.
+    Scatters a tensor into a new tensor depending on the specified indices.
 
-    Create an empty tensor, and set values by scattering the update tensor depending on indices.
+    Creates an empty tensor, and set values by scattering the update tensor depending on indices.
 
     Inputs:
         - **indices** (Tensor) - The index of scattering in the new tensor with int32 data type.
@@ -2434,7 +2434,7 @@ class ScatterNd(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init ScatterNd"""
+        """Initialize ScatterNd"""
         self.init_prim_io_names(inputs=['indices', 'update', 'shape'], outputs=['output'])
 
     def __infer__(self, indices, update, shape):
@@ -2456,9 +2456,9 @@ class ScatterNd(PrimitiveWithInfer):
 
 class ResizeNearestNeighbor(PrimitiveWithInfer):
     r"""
-    Resize the input tensor by using nearest neighbor algorithm.
+    Resizes the input tensor by using nearest neighbor algorithm.
 
-    Resize the input tensor to a given size by using the nearest neighbor algorithm. The nearest
+    Resizes the input tensor to a given size by using the nearest neighbor algorithm. The nearest
     neighbor algorithm selects the value of the nearest point and does not consider the
     values of neighboring points at all, yielding a piecewise-constant interpolant.
 
@@ -2481,7 +2481,7 @@ class ResizeNearestNeighbor(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, size, align_corners=False):
-        """Init ResizeNearestNeighbor"""
+        """Initialize ResizeNearestNeighbor"""
         validator.check_value_type("size", size, [tuple, list], self.name)
         validator.check_value_type("align_corners", align_corners, [bool], self.name)
         validator.check_integer("length of size", len(size), 2, Rel.EQ, self.name)
@@ -2501,7 +2501,7 @@ class ResizeNearestNeighbor(PrimitiveWithInfer):
 
 class GatherNd(PrimitiveWithInfer):
     """
-    Gather slices from a tensor by indices.
+    Gathers slices from a tensor by indices.
 
     Using given indices to gather slices from a tensor with a specified shape.
 
@@ -2522,7 +2522,7 @@ class GatherNd(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init GatherNd"""
+        """Initialize GatherNd"""
         self.init_prim_io_names(inputs=['input_x', 'indices'], outputs=['y'])
 
     def infer_shape(self, x_shape, indices_shape):
@@ -2538,7 +2538,7 @@ class GatherNd(PrimitiveWithInfer):
 
 class TensorScatterUpdate(PrimitiveWithInfer):
     """
-    Update tensor value using given values, along with the input indices.
+    Updates tensor value using given values, along with the input indices.
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The dimension of input_x must be equal to indices.shape[-1].
@@ -2561,7 +2561,7 @@ class TensorScatterUpdate(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init TensorScatterUpdate"""
+        """Initialize TensorScatterUpdate"""
         self.init_prim_io_names(inputs=['x', 'indices', 'value'], outputs=['y'])
 
     def infer_shape(self, x_shape, indices_shape, value_shape):
@@ -2580,7 +2580,7 @@ class TensorScatterUpdate(PrimitiveWithInfer):
 
 class ScatterUpdate(_ScatterOp):
     """
-    Update tensor value by using input indices and value.
+    Updates tensor value by using input indices and value.
 
     Using given values to update tensor value, along with the input indices.
 
@@ -2615,7 +2615,7 @@ class ScatterUpdate(_ScatterOp):
 
     @prim_attr_register
     def __init__(self, use_locking=True):
-        """Init ScatterUpdate"""
+        """Initialize ScatterUpdate"""
         validator.check_value_type('use_locking', use_locking, [bool], self.name)
         self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
 
@@ -2628,7 +2628,7 @@ class ScatterUpdate(_ScatterOp):
 
 class ScatterNdUpdate(_ScatterNdOp):
     """
-    Update tensor value by using input indices and value.
+    Updates tensor value by using input indices and value.
 
     Using given values to update tensor value, along with the input indices.
 
@@ -2660,7 +2660,7 @@ class ScatterNdUpdate(_ScatterNdOp):
 
     @prim_attr_register
     def __init__(self, use_locking=True):
-        """Init ScatterNdUpdate"""
+        """Initialize ScatterNdUpdate"""
         validator.check_value_type('use_locking', use_locking, [bool], self.name)
         self.init_prim_io_names(inputs=['x', 'indices', 'value'], outputs=['y'])
 
@@ -2673,7 +2673,7 @@ class ScatterNdUpdate(_ScatterNdOp):
 
 class ScatterMax(_ScatterOp):
     """
-    Update the value of the input tensor through the max operation.
+    Updates the value of the input tensor through the max operation.
 
     Using given values to update tensor value through the max operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2688,7 +2688,7 @@ class ScatterMax(_ScatterOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do max operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do max operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the maximum operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2706,14 +2706,14 @@ class ScatterMax(_ScatterOp):
 
     @prim_attr_register
     def __init__(self, use_locking=True):
-        """Init ScatterMax"""
+        """Initialize ScatterMax"""
         self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
         validator.check_value_type('use_locking', use_locking, (bool,), self.name)
 
 
 class ScatterMin(_ScatterOp):
     """
-    Update the value of the input tensor through the min operation.
+    Updates the value of the input tensor through the min operation.
 
     Using given values to update tensor value through the min operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2728,7 +2728,7 @@ class ScatterMin(_ScatterOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do min operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do min operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor doing the min operation with `input_x`,
           the data type is same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2747,7 +2747,7 @@ class ScatterMin(_ScatterOp):
 
 class ScatterAdd(_ScatterOp):
     """
-    Update the value of the input tensor through the add operation.
+    Updates the value of the input tensor through the add operation.
 
     Using given values to update tensor value through the add operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2762,7 +2762,7 @@ class ScatterAdd(_ScatterOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do add operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do add operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the add operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2781,7 +2781,7 @@ class ScatterAdd(_ScatterOp):
 
 class ScatterSub(_ScatterOp):
     """
-    Update the value of the input tensor through the subtraction operation.
+    Updates the value of the input tensor through the subtraction operation.
 
     Using given values to update tensor value through the subtraction operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2797,7 +2797,7 @@ class ScatterSub(_ScatterOp):
     Inputs:
         - **input_x** (Parameter) - The target parameter.
         - **indices** (Tensor) - The index to perform the subtraction operation
-          whose data type should be mindspore.int32.
+          whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the subtraction operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2816,7 +2816,7 @@ class ScatterSub(_ScatterOp):
 
 class ScatterMul(_ScatterOp):
     """
-    Update the value of the input tensor through the mul operation.
+    Updates the value of the input tensor through the mul operation.
 
     Using given values to update tensor value through the mul operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2831,7 +2831,7 @@ class ScatterMul(_ScatterOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do mul operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do mul operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor doing the mul operation with `input_x`,
           the data type is same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2850,7 +2850,7 @@ class ScatterMul(_ScatterOp):
 
 class ScatterDiv(_ScatterOp):
     """
-    Update the value of the input tensor through the div operation.
+    Updates the value of the input tensor through the div operation.
 
     Using given values to update tensor value through the div operation, along with the input indices.
     This operation outputs the `input_x` after the update is done, which makes it convenient to use the updated value.
@@ -2865,7 +2865,7 @@ class ScatterDiv(_ScatterOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do div operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do div operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the div operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape + x_shape[1:]`.
 
@@ -2899,7 +2899,7 @@ class ScatterNdAdd(_ScatterNdOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do add operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do add operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor doing the add operation with `input_x`,
           the data type is same as `input_x`, the shape is `indices_shape[:-1] + x_shape[indices_shape[-1]:]`.
 
@@ -2933,7 +2933,7 @@ class ScatterNdSub(_ScatterNdOp):
 
     Inputs:
         - **input_x** (Parameter) - The target parameter.
-        - **indices** (Tensor) - The index to do add operation whose data type should be mindspore.int32.
+        - **indices** (Tensor) - The index to do add operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the subtraction operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape[:-1] + x_shape[indices_shape[-1]:]`.
 
@@ -2963,8 +2963,8 @@ class ScatterNonAliasingAdd(_ScatterNdOp):
     RuntimeError exception will be thrown when the data type conversion of Parameter is required.
 
     Inputs:
-        - **input_x** (Parameter) - The target parameter. The data type should be float16, float32 or int32.
-        - **indices** (Tensor) - The index to perform the addition operation whose data type should be mindspore.int32.
+        - **input_x** (Parameter) - The target parameter. The data type must be float16, float32 or int32.
+        - **indices** (Tensor) - The index to perform the addition operation whose data type must be mindspore.int32.
         - **updates** (Tensor) - The tensor that performs the addition operation with `input_x`,
           the data type is the same as `input_x`, the shape is `indices_shape[:-1] + x_shape[indices_shape[-1]:]`.
 
@@ -2982,7 +2982,7 @@ class ScatterNonAliasingAdd(_ScatterNdOp):
 
     @prim_attr_register
     def __init__(self):
-        """Init ScatterNonAliasingAdd"""
+        """Initialize ScatterNonAliasingAdd"""
         self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
 
     def infer_dtype(self, x_dtype, indices_dtype, updates_dtype):
@@ -2994,7 +2994,7 @@ class ScatterNonAliasingAdd(_ScatterNdOp):
 
 class SpaceToDepth(PrimitiveWithInfer):
     r"""
-    Rearrange blocks of spatial data into depth.
+    Rearranges blocks of spatial data into depth.
 
     The output tensor's `height` dimension is :math:`height / block\_size`.
 
@@ -3024,7 +3024,7 @@ class SpaceToDepth(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_size):
-        """Init SpaceToDepth"""
+        """Initialize SpaceToDepth"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type('block_size', block_size, [int], self.name)
         validator.check('block_size', block_size, '', 2, Rel.GE)
@@ -3049,7 +3049,7 @@ class SpaceToDepth(PrimitiveWithInfer):
 
 class DepthToSpace(PrimitiveWithInfer):
     r"""
-    Rearrange blocks of depth data into spatial dimensions.
+    Rearranges blocks of depth data into spatial dimensions.
 
     This is the reverse operation of SpaceToDepth.
 
@@ -3081,7 +3081,7 @@ class DepthToSpace(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_size):
-        """Init DepthToSpace"""
+        """Initialize DepthToSpace"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type('block_size', block_size, [int], self.name)
         validator.check('block_size', block_size, '', 2, Rel.GE, self.name)
@@ -3106,7 +3106,7 @@ class DepthToSpace(PrimitiveWithInfer):
 
 class SpaceToBatch(PrimitiveWithInfer):
     r"""
-    Divide spatial dimensions into blocks and combine the block size with the original batch.
+    Divides spatial dimensions into blocks and combine the block size with the original batch.
 
     This operation will divide spatial dimensions (H, W) into blocks with `block_size`, the output tensor's H and W
     dimension is the corresponding number of blocks after division. The output tensor's batch dimension is the
@@ -3149,7 +3149,7 @@ class SpaceToBatch(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_size, paddings):
-        """Init SpaceToBatch"""
+        """Initialize SpaceToBatch"""
         validator.check_value_type('block_size', block_size, [int], self.name)
         validator.check('block_size', block_size, '', 2, Rel.GE, self.name)
         self.block_size = block_size
@@ -3178,7 +3178,7 @@ class SpaceToBatch(PrimitiveWithInfer):
 
 class BatchToSpace(PrimitiveWithInfer):
     r"""
-    Divide batch dimension with blocks and interleaves these blocks back into spatial dimensions.
+    Divides batch dimension with blocks and interleaves these blocks back into spatial dimensions.
 
     This operation will divide batch dimension N into blocks with block_size, the output tensor's N dimension
     is the corresponding number of blocks after division. The output tensor's H, W dimension is product of original H, W
@@ -3193,7 +3193,7 @@ class BatchToSpace(PrimitiveWithInfer):
             input_shape[i+2]*block_size >= crops[i][0]+crops[i][1].
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor. It must be a 4-D tensor, dimension 0 should be divisible by
+        - **input_x** (Tensor) - The input tensor. It must be a 4-D tensor, dimension 0 must be divisible by
           product of `block_shape`.
 
     Outputs:
@@ -3220,7 +3220,7 @@ class BatchToSpace(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_size, crops):
-        """Init BatchToSpace"""
+        """Initialize BatchToSpace"""
         validator.check_value_type('block_size', block_size, [int], self.name)
         validator.check('block_size', block_size, '', 2, Rel.GE, self.name)
         self.block_size = block_size
@@ -3253,7 +3253,7 @@ class BatchToSpace(PrimitiveWithInfer):
 
 class SpaceToBatchND(PrimitiveWithInfer):
     r"""
-    Divide spatial dimensions into blocks and combine the block size with the original batch.
+    Divides spatial dimensions into blocks and combine the block size with the original batch.
 
     This operation will divide spatial dimensions (H, W) into blocks with block_shape, the output tensor's H and W
     dimension is the corresponding number of blocks after division. The output tensor's batch dimension is the
@@ -3296,7 +3296,7 @@ class SpaceToBatchND(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_shape, paddings):
-        """Init SpaceToBatchND"""
+        """Initialize SpaceToBatchND"""
         self.ori_block_shape = block_shape
         self.ori_paddings = paddings
         validator.check_value_type('block_shape type', block_shape, [list, tuple], self.name)
@@ -3347,7 +3347,7 @@ class SpaceToBatchND(PrimitiveWithInfer):
 
 class BatchToSpaceND(PrimitiveWithInfer):
     r"""
-    Divide batch dimension with blocks and interleave these blocks back into spatial dimensions.
+    Divides batch dimension with blocks and interleave these blocks back into spatial dimensions.
 
     This operation will divide batch dimension N into blocks with block_shape, the output tensor's N dimension
     is the corresponding number of blocks after division. The output tensor's H, W dimension is product of original H, W
@@ -3362,7 +3362,7 @@ class BatchToSpaceND(PrimitiveWithInfer):
             input dimension i+2. It is required that input_shape[i+2]*block_shape[i] > crops[i][0]+crops[i][1].
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor. It must be a 4-D tensor, dimension 0 should be divisible by
+        - **input_x** (Tensor) - The input tensor. It must be a 4-D tensor, dimension 0 must be divisible by
           product of `block_shape`.
 
     Outputs:
@@ -3389,7 +3389,7 @@ class BatchToSpaceND(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, block_shape, crops):
-        """Init BatchToSpaceND"""
+        """Initialize BatchToSpaceND"""
         self.ori_block_shape = block_shape
         self.ori_crops = crops
         validator.check_value_type('block_shape type', block_shape, [list, tuple], self.name)
@@ -3434,7 +3434,7 @@ class BatchToSpaceND(PrimitiveWithInfer):
             out_shape[i + offset] = x_block_prod - crops_sum
 
         if out_shape[0] % block_shape_prod != 0:
-            raise ValueError(f'For \'{self.name}\' input_x dimension 0 {out_shape[0]}  should be divisible by '
+            raise ValueError(f'For \'{self.name}\' input_x dimension 0 {out_shape[0]} should be divisible by '
                              f'block_shape_prod {block_shape_prod}')
         out_shape[0] = out_shape[0] // block_shape_prod
         return out_shape
@@ -3465,7 +3465,7 @@ class BroadcastTo(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, shape):
-        """Init BroadcastTo"""
+        """Initialize BroadcastTo"""
         validator.check_value_type("shape", shape, (tuple), self.name)
         validator.check("shape length", len(shape), "", 0, Rel.GT, self.name)
         for i in shape:
@@ -3518,7 +3518,7 @@ class InplaceUpdate(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, indices):
-        """Init InplaceUpdate"""
+        """Initialize InplaceUpdate"""
         self.init_prim_io_names(inputs=['x', 'v'], outputs=['y'])
         self.indices = indices
         validator.check_value_type("indices", indices, [int, tuple], self.name)
@@ -3573,7 +3573,7 @@ class ReverseSequence(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seq_dim, batch_dim=0):
-        """init ReverseSequence"""
+        """Initialize ReverseSequence"""
         self.init_prim_io_names(inputs=['x', 'seq_lengths'], outputs=['y'])
         validator.check_value_type("seq_dim", seq_dim, [int], self.name)
         self.seq_dim_ = seq_dim
@@ -3600,7 +3600,7 @@ class EditDistance(PrimitiveWithInfer):
     Computes the Levebshtein Edit Distance. It is used to measure the similarity of two sequences.
 
     Args:
-        normalize (bool): If True, edit distances are normalized by length of truth. Default: True.
+        normalize (bool): If true, edit distances are normalized by length of truth. Default: True.
 
     Inputs:
         - **hypothesis_indices** (Tensor) - The indices of the hypothesis list SparseTensor. With int64 data type.
@@ -3643,7 +3643,7 @@ class EditDistance(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, normalize=True):
-        """init EditDistance"""
+        """Initialize EditDistance"""
         self.normalize = validator.check_value_type("normalize", normalize, [bool], self.name)
 
     def __infer__(self, h_indices, h_values, h_shape, truth_indices, truth_values, truth_shape):
@@ -3684,7 +3684,7 @@ class EditDistance(PrimitiveWithInfer):
 
 class TransShape(PrimitiveWithInfer):
     """
-    Transform the shape of input tensor to target shape.
+    Transforms the shape of input tensor to target shape.
 
     Inputs:
         - **input_x** (Tensor) - A input tensor.
@@ -3733,7 +3733,7 @@ class Sort(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, axis=-1, descending=False):
-        """init Sort"""
+        """Initialize Sort"""
         self.axis = validator.check_value_type("axis", axis, [int], self.name)
         self.descending = validator.check_value_type("descending", descending, [bool], self.name)
 
@@ -3773,7 +3773,7 @@ class EmbeddingLookup(PrimitiveWithInfer):
     """
     @prim_attr_register
     def __init__(self):
-        """init index_select"""
+        """Initialize index_select"""
         self.__setattr_flag__ = True
         self.init_prim_io_names(inputs=['params', 'indices', 'offset'],
                                 outputs=['output'])
diff --git a/mindspore/ops/operations/comm_ops.py b/mindspore/ops/operations/comm_ops.py
index ccd55f6866..42cdee7e26 100644
--- a/mindspore/ops/operations/comm_ops.py
+++ b/mindspore/ops/operations/comm_ops.py
@@ -416,7 +416,7 @@ class _AlltoAll(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, split_count, split_dim, concat_dim, group=GlobalComm.WORLD_COMM_GROUP):
-        """init AlltoAll"""
+        """Initialize AlltoAll"""
         validator.check_value_type('group', _get_group(group), (str,), self.name)
         self.split_count = split_count
         self.split_dim = split_dim
@@ -520,7 +520,7 @@ class _GetTensorSlice(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ChunkTensor"""
+        """Initialize ChunkTensor"""
 
     def infer_value(self, x, dev_mat, tensor_map):
         from mindspore.parallel._tensor import _load_tensor
diff --git a/mindspore/ops/operations/control_ops.py b/mindspore/ops/operations/control_ops.py
index 12bad28b30..11d6168925 100644
--- a/mindspore/ops/operations/control_ops.py
+++ b/mindspore/ops/operations/control_ops.py
@@ -27,8 +27,8 @@ class ControlDepend(Primitive):
 
     In many cases, we need to control the execution order of operations. ControlDepend is designed for this.
     ControlDepend will instruct the execution engine to run the operations in a specific order. ControlDepend
-    tells the engine that the destination operations should depend on the source operation which means the source
-    operations should be executed before the destination.
+    tells the engine that the destination operations must depend on the source operation which means the source
+    operations must be executed before the destination.
 
     Note:
         This operation does not work in `PYNATIVE_MODE`.
@@ -86,7 +86,7 @@ class GeSwitch(PrimitiveWithInfer):
 
     Inputs:
         - **data** (Union[Tensor, Number]) - The data to be used for switch control.
-        - **pred** (Tensor) - It should be a scalar whose type is bool and shape is `()`, It is used as condition for
+        - **pred** (Tensor) - It must be a scalar whose type is bool and shape is `()`, It is used as condition for
           switch control.
     Outputs:
         tuple. Output is tuple(false_output, true_output). The Elements in the tuple has the same shape of input data.
@@ -142,10 +142,10 @@ class Merge(PrimitiveWithInfer):
     """
     Merges all input data to one.
 
-    One and only one of the inputs should be selected as the output
+    One and only one of the inputs must be selected as the output
 
     Inputs:
-        - **inputs** (Union(Tuple, List)) - The data to be merged. All tuple elements should have the same data type.
+        - **inputs** (Union(Tuple, List)) - The data to be merged. All tuple elements must have the same data type.
 
     Outputs:
         tuple. Output is tuple(`data`, `output_index`). The `data` has the same shape of `inputs` element.
diff --git a/mindspore/ops/operations/debug_ops.py b/mindspore/ops/operations/debug_ops.py
index 2d7ffc466c..37619003c0 100644
--- a/mindspore/ops/operations/debug_ops.py
+++ b/mindspore/ops/operations/debug_ops.py
@@ -22,7 +22,7 @@ from ..primitive import prim_attr_register, PrimitiveWithInfer, Primitive
 
 
 def _check_summary_param(name, value, class_name):
-    """Check the name and value is valid for summary."""
+    """Checks the name and value is valid for summary."""
     n_type = name['dtype']
     n_value = name['value']
     validator.check_value_type('name', n_type, [type(mstype.string)], class_name)
@@ -42,11 +42,11 @@ SUMMARY_RETURN_VALUE = {'dtype': mstype.int32, 'shape': [1], 'value': None}
 
 class ScalarSummary(PrimitiveWithInfer):
     """
-    Output a scalar to a protocol buffer through a scalar summary operator.
+    Outputs a scalar to a protocol buffer through a scalar summary operator.
 
     Inputs:
-        - **name** (str) - The name of the input variable, it should not be an empty string.
-        - **value** (Tensor) - The value of scalar, and the shape of value should be [] or [1].
+        - **name** (str) - The name of the input variable, it must not be an empty string.
+        - **value** (Tensor) - The value of scalar, and the shape of value must be [] or [1].
 
     Examples:
         >>> class SummaryDemo(nn.Cell):
@@ -80,11 +80,11 @@ class ScalarSummary(PrimitiveWithInfer):
 
 class ImageSummary(PrimitiveWithInfer):
     """
-    Output image tensor to protocol buffer through image summary operator.
+    Outputs image tensor to protocol buffer through image summary operator.
 
     Inputs:
-        - **name** (str) - The name of the input variable, it should not be an empty string.
-        - **value** (Tensor) - The value of image, the rank of tensor should be 4.
+        - **name** (str) - The name of the input variable, it must not be an empty string.
+        - **value** (Tensor) - The value of image, the rank of tensor must be 4.
 
     Examples:
         >>> class Net(nn.Cell):
@@ -117,11 +117,11 @@ class ImageSummary(PrimitiveWithInfer):
 
 class TensorSummary(PrimitiveWithInfer):
     """
-    Output a tensor to a protocol buffer through a tensor summary operator.
+    Outputs a tensor to a protocol buffer through a tensor summary operator.
 
     Inputs:
         - **name** (str) - The name of the input variable.
-        - **value** (Tensor) - The value of tensor, and the rank of tensor should be greater than 0.
+        - **value** (Tensor) - The value of tensor, and the rank of tensor must be greater than 0.
 
     Examples:
         >>> class SummaryDemo(nn.Cell):
@@ -155,11 +155,11 @@ class TensorSummary(PrimitiveWithInfer):
 
 class HistogramSummary(PrimitiveWithInfer):
     """
-    Output tensor to protocol buffer through histogram summary operator.
+    Outputs tensor to protocol buffer through histogram summary operator.
 
     Inputs:
         - **name** (str) - The name of the input variable.
-        - **value** (Tensor) - The value of tensor, and the rank of tensor should be greater than 0.
+        - **value** (Tensor) - The value of tensor, and the rank of tensor must be greater than 0.
 
     Examples:
         >>> class SummaryDemo(nn.Cell):
@@ -193,7 +193,7 @@ class HistogramSummary(PrimitiveWithInfer):
 
 class InsertGradientOf(PrimitiveWithInfer):
     """
-    Attach callback to graph node that will be invoked on the node's gradient.
+    Attaches callback to graph node that will be invoked on the node's gradient.
 
     Args:
         f (Function): MindSpore's Function. Callback function.
@@ -252,7 +252,7 @@ class HookBackward(PrimitiveWithInfer):
     is only supported in Pynative Mode.
 
     Note:
-        The hook function should be defined like `hook_fn(grad) -> Tensor or None`,
+        The hook function must be defined like `hook_fn(grad) -> Tensor or None`,
         where grad is the gradient passed to the primitive and gradient may be
         modified and passed to next primitive. The difference between a hook function and
         callback of InsertGradientOf is that a hook function is executed in the python
@@ -305,7 +305,7 @@ class HookBackward(PrimitiveWithInfer):
 
 class Print(PrimitiveWithInfer):
     """
-    Output tensor or string to stdout.
+    Outputs tensor or string to stdout.
 
     Note:
         In pynative mode, please use python print function.
@@ -344,7 +344,7 @@ class Print(PrimitiveWithInfer):
 
 class Debug(Primitive):
     """
-    Print tensor value.
+    Prints tensor value.
 
     Inputs:
         - **value** (Tensor) - The value of tensor.
@@ -395,7 +395,7 @@ class Assert(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, summarize=3):
-        """init Assert"""
+        """Initialize Assert"""
         self.summarize = validator.check_value_type("summarize", summarize, [int], self.name)
 
     def infer_shape(self, condition, inputs):
diff --git a/mindspore/ops/operations/image_ops.py b/mindspore/ops/operations/image_ops.py
index e5f306a693..d9547c230c 100644
--- a/mindspore/ops/operations/image_ops.py
+++ b/mindspore/ops/operations/image_ops.py
@@ -26,7 +26,7 @@ class CropAndResize(PrimitiveWithInfer):
     Extracts crops from the input image tensor and resizes them.
 
     Note:
-        In case that the output shape depends on crop_size, the crop_size should be constant.
+        In case that the output shape depends on crop_size, the crop_size must be constant.
 
     Args:
         method (str):  	An optional string that specifies the sampling method for resizing.
@@ -79,7 +79,7 @@ class CropAndResize(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, method="bilinear", extrapolation_value=0.0):
-        """init CropAndResize"""
+        """Initialize CropAndResize"""
         self.init_prim_io_names(inputs=['x', 'boxes', 'box_index', 'crop_size'], outputs=['y'])
         validator.check_value_type("method", method, [str], self.name)
         validator.check_string("method", method, ["bilinear", "nearest", "bilinear_v2"], self.name)
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 1f5430e576..f6f3b4afc7 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -70,7 +70,7 @@ class _BinaryOp(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init _BinaryOp"""
+        """Initialize _BinaryOp"""
         self.init_prim_io_names(inputs=['x', 'y'], outputs=['output'])
 
     def infer_shape(self, x_shape, y_shape):
@@ -99,7 +99,7 @@ class _BitwiseBinaryOp(_MathBinaryOp):
 
     @prim_attr_register
     def __init__(self):
-        """init _BitwiseBinaryOp"""
+        """Initialize _BitwiseBinaryOp"""
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
 
     @staticmethod
@@ -166,7 +166,7 @@ class AssignAdd(PrimitiveWithInfer):
     Inputs:
         - **variable** (Parameter) - The `Parameter`.
         - **value** (Union[numbers.Number, Tensor]) - The value to be added to the `variable`.
-          It should have the same shape as `variable` if it is a Tensor.
+          It must have the same shape as `variable` if it is a Tensor.
 
     Examples:
         >>> class Net(Cell):
@@ -190,7 +190,7 @@ class AssignAdd(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AssignAdd"""
+        """Initialize AssignAdd"""
         self.init_prim_io_names(inputs=['ref', 'value'], outputs=['output'])
 
     def infer_shape(self, variable, value):
@@ -216,7 +216,7 @@ class AssignSub(PrimitiveWithInfer):
     Inputs:
         - **variable** (Parameter) - The `Parameter`.
         - **value** (Union[numbers.Number, Tensor]) - The value to be subtracted from the `variable`.
-          It should have the same shape as `variable` if it is a Tensor.
+          It must have the same shape as `variable` if it is a Tensor.
 
     Examples:
         >>> class Net(Cell):
@@ -241,7 +241,7 @@ class AssignSub(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init AssignSub"""
+        """Initialize AssignSub"""
 
     def infer_shape(self, variable, value):
         return value
@@ -257,8 +257,8 @@ class _Reduce(PrimitiveWithInfer):
     Definition of base class of reduction class operators.
 
     Args:
-         keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                           If False, don't keep these dimensions.
+         keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                           If false, don't keep these dimensions.
     """
 
     __mindspore_signature__ = (
@@ -268,7 +268,7 @@ class _Reduce(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, keep_dims=False):
-        """init Reduce"""
+        """Initialize Reduce"""
         validator.check_value_type('keep_dims', keep_dims, [bool], self.name)
         self.init_prim_io_names(inputs=['input_x', 'axis'], outputs=['y'])
         self.add_prim_attr("io_format", "ND")
@@ -320,8 +320,8 @@ class ReduceMean(_Reduce):
      The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions. Default: False.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions. Default: False.
 
     Inputs:
         - **input_x** (Tensor[Number]) - The input tensor.
@@ -352,8 +352,8 @@ class ReduceSum(_Reduce):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions. Default: False.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions. Default: False.
 
     Inputs:
          - **input_x** (Tensor[Number]) - The input tensor.
@@ -378,7 +378,7 @@ class ReduceSum(_Reduce):
 
     @prim_attr_register
     def __init__(self, keep_dims=False):
-        """init ReduceSum"""
+        """Initialize ReduceSum"""
         super(ReduceSum, self).__init__(keep_dims)
         self.__setattr_flag__ = True
 
@@ -390,8 +390,8 @@ class ReduceAll(_Reduce):
     The dtype of the tensor to be reduced is bool.
 
     Args:
-       keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                         If False, don't keep these dimensions.
+       keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                         If false, don't keep these dimensions.
                          Default : False, don't keep these reduced dimensions.
 
     Inputs:
@@ -426,8 +426,8 @@ class ReduceAny(_Reduce):
     The dtype of the tensor to be reduced is bool.
 
     Args:
-       keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                         If False, don't keep these dimensions.
+       keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                         If false, don't keep these dimensions.
                          Default : False, don't keep these reduced dimensions.
 
     Inputs:
@@ -462,8 +462,8 @@ class ReduceMax(_Reduce):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions.
                           Default : False, don't keep these reduced dimensions.
 
     Inputs:
@@ -501,8 +501,8 @@ class ReduceMin(_Reduce):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions.
                           Default : False, don't keep these reduced dimensions.
 
     Inputs:
@@ -534,8 +534,8 @@ class ReduceProd(_Reduce):
     The dtype of the tensor to be reduced is number.
 
     Args:
-        keep_dims (bool): If True, keep these reduced dimensions and the length is 1.
-                          If False, don't keep these dimensions.
+        keep_dims (bool): If true, keep these reduced dimensions and the length is 1.
+                          If false, don't keep these dimensions.
                           Default : False, don't keep these reduced dimensions.
 
     Inputs:
@@ -565,8 +565,8 @@ class CumProd(PrimitiveWithInfer):
     Compute the cumulative product of the tensor x along axis.
 
     Args:
-        exclusive (bool): If True, perform exclusive cumulative product. Default: False.
-        reverse (bool): If True, reverse the result along axis. Default: False
+        exclusive (bool): If true, perform exclusive cumulative product. Default: False.
+        reverse (bool): If true, reverse the result along axis. Default: False
 
     Inputs:
         - **input_x** (Tensor[Number]) - The input tensor.
@@ -616,14 +616,14 @@ class MatMul(PrimitiveWithInfer):
     The rank of input tensors must be `2`.
 
     Args:
-        transpose_a (bool): If True, `a` is transposed before multiplication. Default: False.
-        transpose_b (bool): If True, `b` is transposed before multiplication. Default: False.
+        transpose_a (bool): If true, `a` is transposed before multiplication. Default: False.
+        transpose_b (bool): If true, `b` is transposed before multiplication. Default: False.
 
     Inputs:
         - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(N, C)`. If
-          `transpose_a` is True, its shape should be :math:`(N, C)` after transposing.
+          `transpose_a` is True, its shape must be :math:`(N, C)` after transposing.
         - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(C, M)`. If
-          `transpose_b` is True, its shape should be :math:`(C, M)` after transpose.
+          `transpose_b` is True, its shape must be :math:`(C, M)` after transpose.
 
     Outputs:
         Tensor, the shape of the output tensor is :math:`(N, M)`.
@@ -690,17 +690,17 @@ class BatchMatMul(MatMul):
     The two input tensors must have the same rank and the rank must be not less than `3`.
 
     Args:
-        transpose_a (bool): If True, the last two dimensions of `a` is transposed before multiplication.
+        transpose_a (bool): If true, the last two dimensions of `a` is transposed before multiplication.
             Default: False.
-        transpose_b (bool): If True, the last two dimensions of `b` is transposed before multiplication.
+        transpose_b (bool): If true, the last two dimensions of `b` is transposed before multiplication.
             Default: False.
 
     Inputs:
         - **input_x** (Tensor) - The first tensor to be multiplied. The shape of the tensor is :math:`(*B, N, C)`,
           where :math:`*B` represents the batch size which can be multidimensional, :math:`N` and :math:`C` are the
-          size of the last two dimensions. If `transpose_a` is True, its shape should be :math:`(*B, C, N)`.
+          size of the last two dimensions. If `transpose_a` is True, its shape must be :math:`(*B, C, N)`.
         - **input_y** (Tensor) - The second tensor to be multiplied. The shape of the tensor is :math:`(*B, C, M)`. If
-          `transpose_b` is True, its shape should be :math:`(*B, M, C)`.
+          `transpose_b` is True, its shape must be :math:`(*B, M, C)`.
 
     Outputs:
         Tensor, the shape of the output tensor is :math:`(*B, N, M)`.
@@ -735,8 +735,8 @@ class CumSum(PrimitiveWithInfer):
     Computes the cumulative sum of input tensor along axis.
 
     Args:
-        exclusive (bool): If True, perform exclusive mode. Default: False.
-        reverse (bool): If True, perform inverse cumulative sum. Default: False.
+        exclusive (bool): If true, perform exclusive mode. Default: False.
+        reverse (bool): If true, perform inverse cumulative sum. Default: False.
 
     Inputs:
         - **input** (Tensor) - The input tensor to accumulate.
@@ -758,7 +758,7 @@ class CumSum(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, exclusive=False, reverse=False):
-        """init cumsum"""
+        """Initialize cumsum"""
         cls_name = self.name
         validator.check_value_type('exclusive', exclusive, [bool], cls_name)
         validator.check_value_type('reverse', reverse, [bool], cls_name)
@@ -781,7 +781,7 @@ class AddN(PrimitiveWithInfer):
     """
     Computes addition of all input tensors element-wise.
 
-    All input tensors should have the same shape.
+    All input tensors must have the same shape.
 
     Inputs:
         - **input_x** (Union(tuple[Tensor], list[Tensor])) - The input tuple or list
@@ -932,7 +932,7 @@ class Neg(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Neg"""
+        """Initialize Neg"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, input_x):
@@ -980,7 +980,7 @@ class InplaceAdd(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, indices):
-        """init InplaceAdd"""
+        """Initialize InplaceAdd"""
         self.init_prim_io_names(inputs=['x', 'v'], outputs=['y'])
         self.indices = indices
         validator.check_value_type('indices', indices, [tuple, int], self.name)
@@ -1038,7 +1038,7 @@ class InplaceSub(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, indices):
-        """init InplaceSub"""
+        """Initialize InplaceSub"""
         self.init_prim_io_names(inputs=['x', 'v'], outputs=['y'])
         self.indices = indices
         validator.check_value_type('indices', indices, [tuple, int], self.name)
@@ -1198,7 +1198,7 @@ class Square(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Square"""
+        """Initialize Square"""
         self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -1222,7 +1222,7 @@ class Rsqrt(PrimitiveWithInfer):
     Computes reciprocal of square root of input tensor element-wise.
 
     Inputs:
-        - **input_x** (Tensor) - The input of Rsqrt. Each element should be a non-negative number.
+        - **input_x** (Tensor) - The input of Rsqrt. Each element must be a non-negative number.
 
     Outputs:
         Tensor, has the same type and shape as `input_x`.
@@ -1236,7 +1236,7 @@ class Rsqrt(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Rsqrt"""
+        """Initialize Rsqrt"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -1274,7 +1274,7 @@ class Sqrt(PrimitiveWithCheck):
 
     @prim_attr_register
     def __init__(self):
-        """init Sqrt"""
+        """Initialize Sqrt"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def check_dtype(self, x_type):
@@ -1308,7 +1308,7 @@ class Reciprocal(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Reciprocal"""
+        """Initialize Reciprocal"""
         if context.get_context("device_target") == "GPU":
             self.target = "GPU"
         else:
@@ -1395,7 +1395,7 @@ class Exp(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Exp"""
+        """Initialize Exp"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, x_shape):
@@ -1433,7 +1433,7 @@ class Expm1(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Exp"""
+        """Initialize Exp"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, x_shape):
@@ -1576,7 +1576,7 @@ class Erf(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Erf"""
+        """Initialize Erf"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, x_shape):
@@ -1606,7 +1606,7 @@ class Erfc(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Erfc"""
+        """Initialize Erfc"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
 
     def infer_shape(self, x_shape):
@@ -1750,7 +1750,7 @@ class Div(_MathBinaryOp):
           a bool or a tensor whose data type is number or bool.
         - **input_y** (Union[Tensor, Number, bool]) - When the first input is a tensor, The second input
           could be a number, a bool, or a tensor whose data type is number or bool. When the first input
-          is a number or a bool, the second input should be a tensor whose data type is number or bool.
+          is a number or a bool, the second input must be a tensor whose data type is number or bool.
 
     Outputs:
         Tensor, the shape is the same as the one after broadcasting,
@@ -1923,7 +1923,7 @@ class Mod(_MathBinaryOp):
         - **input_x** (Union[Tensor, Number]) - The first input is a number or a tensor whose data type is number.
         - **input_y** (Union[Tensor, Number]) - When the first input is a tensor, The second input
           could be a number or a tensor whose data type is number. When the first input is a number,
-          the second input should be a tensor whose data type is number.
+          the second input must be a tensor whose data type is number.
 
     Outputs:
         Tensor, the shape is the same as the one after broadcasting,
@@ -2121,7 +2121,7 @@ class Acosh(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Acosh"""
+        """Initialize Acosh"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2150,7 +2150,7 @@ class Cosh(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Cosh"""
+        """Initialize Cosh"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2179,7 +2179,7 @@ class Asinh(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Asinh"""
+        """Initialize Asinh"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2208,7 +2208,7 @@ class Sinh(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Sinh"""
+        """Initialize Sinh"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2297,7 +2297,7 @@ class ApproximateEqual(_LogicBinaryOp):
 
     @prim_attr_register
     def __init__(self, tolerance=1e-05):
-        """Init ApproximateEqual"""
+        """Initialize ApproximateEqual"""
         validator.check_value_type("tolerance", tolerance, [float], self.name)
 
     def infer_shape(self, x_shape, y_shape):
@@ -2315,7 +2315,7 @@ class EqualCount(PrimitiveWithInfer):
     """
     Computes the number of the same elements of two tensors.
 
-    The two input tensors should have the same data type and shape.
+    The two input tensors must have the same data type and shape.
 
     Inputs:
         - **input_x** (Tensor) - The first input tensor.
@@ -2334,7 +2334,7 @@ class EqualCount(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init EqualCount"""
+        """Initialize EqualCount"""
         self.init_prim_io_names(inputs=['x', 'y'], outputs=['output'])
 
     def infer_shape(self, x_shape, y_shape):
@@ -2550,7 +2550,7 @@ class LogicalNot(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init LogicalNot"""
+        """Initialize LogicalNot"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -2568,9 +2568,9 @@ class LogicalAnd(_LogicBinaryOp):
     Inputs of `input_x` and `input_y` comply with the implicit type conversion rules to make the data types consistent.
     The inputs must be two tensors or one tensor and one bool.
     When the inputs are two tensors, the shapes of them could be broadcast,
-    and the data types of them should be bool.
+    and the data types of them must be bool.
     When the inputs are one tensor and one bool, the bool object could only be a constant,
-    and the data type of the tensor should be bool.
+    and the data type of the tensor must be bool.
 
     Inputs:
         - **input_x** (Union[Tensor, bool]) - The first input is a bool or a tensor whose data type is bool.
@@ -2599,9 +2599,9 @@ class LogicalOr(_LogicBinaryOp):
     Inputs of `input_x` and `input_y` comply with the implicit type conversion rules to make the data types consistent.
     The inputs must be two tensors or one tensor and one bool.
     When the inputs are two tensors, the shapes of them could be broadcast,
-    and the data types of them should be bool.
+    and the data types of them must be bool.
     When the inputs are one tensor and one bool, the bool object could only be a constant,
-    and the data type of the tensor should be bool.
+    and the data type of the tensor must be bool.
 
     Inputs:
         - **input_x** (Union[Tensor, bool]) - The first input is a bool or a tensor whose data type is bool.
@@ -2641,7 +2641,7 @@ class IsNan(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init IsNan"""
+        """Initialize IsNan"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -2669,7 +2669,7 @@ class IsInf(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init IsInf"""
+        """Initialize IsInf"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -2698,7 +2698,7 @@ class IsFinite(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init IsFinite"""
+        """Initialize IsFinite"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -2729,7 +2729,7 @@ class FloatStatus(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init FloatStatus"""
+        """Initialize FloatStatus"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -2760,7 +2760,7 @@ class NPUAllocFloatStatus(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init NPUAllocFloatStatus"""
+        """Initialize NPUAllocFloatStatus"""
         self.add_prim_attr("_side_effect_flag", True)
 
     def infer_shape(self):
@@ -2795,7 +2795,7 @@ class NPUGetFloatStatus(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init NPUGetFloatStatus"""
+        """Initialize NPUGetFloatStatus"""
         self.add_prim_attr("_side_effect_flag", True)
 
     def infer_shape(self, x_shape):
@@ -2838,7 +2838,7 @@ class NPUClearFloatStatus(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init NPUClearFloatStatus"""
+        """Initialize NPUClearFloatStatus"""
         self.add_prim_attr("_side_effect_flag", True)
 
     def infer_shape(self, x_shape):
@@ -2870,7 +2870,7 @@ class Cos(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Cos"""
+        """Initialize Cos"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2898,7 +2898,7 @@ class ACos(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ACos"""
+        """Initialize ACos"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2926,7 +2926,7 @@ class Sin(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init Sin."""
+        """Initialize Sin."""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -2955,7 +2955,7 @@ class Asin(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Asin"""
+        """Initialize Asin"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -3006,7 +3006,7 @@ class NMSWithMask(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, iou_threshold=0.5):
-        """Init NMSWithMask"""
+        """Initialize NMSWithMask"""
         validator.check_value_type("iou_threshold", iou_threshold, [float], self.name)
         self.init_prim_io_names(inputs=['bboxes'], outputs=['selected_boxes', 'selected_idx', 'selected_mask'])
         self.is_ge = context.get_context("enable_ge")
@@ -3043,7 +3043,7 @@ class Abs(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Abs"""
+        """Initialize Abs"""
         self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -3115,7 +3115,7 @@ class Round(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Round"""
+        """Initialize Round"""
         self.init_prim_io_names(inputs=['input_x'], outputs=['output'])
 
     def infer_shape(self, x_shape):
@@ -3131,7 +3131,7 @@ class Tan(PrimitiveWithInfer):
     Computes tangent of `input_x` element-wise.
 
     Inputs:
-        - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. Data type should be
+        - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`. Data type must be
           float16, float32 or int32.
 
     Outputs:
@@ -3145,7 +3145,7 @@ class Tan(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Tan"""
+        """Initialize Tan"""
 
     def infer_shape(self, x_shape):
         return x_shape
@@ -3268,7 +3268,7 @@ class SquareSumAll(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init SquareSumAll"""
+        """Initialize SquareSumAll"""
 
     def infer_shape(self, x_shape, y_shape):
         validator.check("x1_shape", x_shape, "x2_shape", y_shape, Rel.EQ, self.name)
@@ -3366,7 +3366,7 @@ class BesselI0e(PrimitiveWithInfer):
         - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
 
     Outputs:
-        Tensor, has the same shape as `input_x`. Data type should be float16 or float32.
+        Tensor, has the same shape as `input_x`. Data type must be float16 or float32.
 
     Examples:
         >>> bessel_i0e = P.BesselI0e()
@@ -3377,7 +3377,7 @@ class BesselI0e(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init BesselI0e"""
+        """Initialize BesselI0e"""
 
     def infer_shape(self, x):
         return x
@@ -3395,7 +3395,7 @@ class BesselI1e(PrimitiveWithInfer):
         - **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
 
     Outputs:
-        Tensor, has the same shape as `input_x`. Data type should be float16 or float32.
+        Tensor, has the same shape as `input_x`. Data type must be float16 or float32.
 
     Examples:
         >>> bessel_i1e = P.BesselI1e()
@@ -3406,7 +3406,7 @@ class BesselI1e(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init BesselI1e"""
+        """Initialize BesselI1e"""
 
     def infer_shape(self, x):
         return x
@@ -3494,7 +3494,7 @@ class Eps(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Eps"""
+        """Initialize Eps"""
         self.init_prim_io_names(inputs=['input_x'], outputs=['y'])
 
     def __infer__(self, input_x):
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 67ac6b84fd..1c59328c74 100644
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -198,7 +198,7 @@ class Softplus(PrimitiveWithInfer):
         \text{output} = \log(1 + \exp(\text{input_x})),
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor whose data type should be float.
+        - **input_x** (Tensor) - The input tensor whose data type must be float.
 
     Outputs:
         Tensor, with the same type and shape as the `input_x`.
@@ -212,7 +212,7 @@ class Softplus(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Softplus"""
+        """Initialize Softplus"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, input_x):
@@ -233,7 +233,7 @@ class Softsign(PrimitiveWithInfer):
         \text{output} = \frac{\text{input_x}}{1 + \left| \text{input_x} \right|},
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor whose data type should be float16 or float32.
+        - **input_x** (Tensor) - The input tensor whose data type must be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the `input_x`.
@@ -247,7 +247,7 @@ class Softsign(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Softsign"""
+        """Initialize Softsign"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, input_x):
@@ -279,7 +279,7 @@ class ReLU(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ReLU"""
+        """Initialize ReLU"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, input_x):
@@ -310,7 +310,7 @@ class ReLU6(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ReLU6"""
+        """Initialize ReLU6"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, input_x):
@@ -328,7 +328,7 @@ class ReLUV2(PrimitiveWithInfer):
     It returns :math:`\max(x,\  0)` element-wise.
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor should be a 4-D tensor.
+        - **input_x** (Tensor) - The input tensor must be a 4-D tensor.
 
     Outputs:
         - **output** (Tensor) - Has the same type and shape as the `input_x`.
@@ -344,7 +344,7 @@ class ReLUV2(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ReLUV2"""
+        """Initialize ReLUV2"""
         self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
 
     def __infer__(self, input_x):
@@ -381,14 +381,14 @@ class ReLUV2(PrimitiveWithInfer):
 class Elu(PrimitiveWithInfer):
     r"""
     Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
-    The data type of input tensor should be float.
+    The data type of input tensor must be float.
 
     Args:
         alpha (float): The coefficient of negative factor whose type is float,
             only support '1.0' currently. Default: 1.0.
 
     Inputs:
-        - **input_x** (Tensor) - The input tensor whose data type should be float.
+        - **input_x** (Tensor) - The input tensor whose data type must be float.
 
     Outputs:
         Tensor, has the same shape and data type as `input_x`.
@@ -403,7 +403,7 @@ class Elu(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, alpha=1.0):
-        """Init Elu"""
+        """Initialize Elu"""
         validator.check_value_type("alpha", alpha, [float], self.name)
         validator.check_number("alpha", alpha, 1.0, Rel.EQ, self.name)
 
@@ -429,7 +429,7 @@ class HSwish(PrimitiveWithInfer):
     where :math:`x_{i}` is the :math:`i`-th slice in the given dimension of the input Tensor.
 
     Inputs:
-        - **input_data** (Tensor) - The input of HSwish, data type should be float16 or float32.
+        - **input_data** (Tensor) - The input of HSwish, data type must be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the `input_data`.
@@ -464,7 +464,7 @@ class Sigmoid(PrimitiveWithInfer):
     where :math:`x_i` is the element of the input.
 
     Inputs:
-        - **input_x** (Tensor) - The input of Sigmoid, data type should be float16 or float32.
+        - **input_x** (Tensor) - The input of Sigmoid, data type must be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the input_x.
@@ -502,7 +502,7 @@ class HSigmoid(PrimitiveWithInfer):
     where :math:`x_{i}` is the :math:`i`-th slice in the given dimension of the input Tensor.
 
     Inputs:
-        - **input_data** (Tensor) - The input of HSigmoid, data type should be float16 or float32.
+        - **input_data** (Tensor) - The input of HSigmoid, data type must be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the `input_data`.
@@ -582,7 +582,7 @@ class FusedBatchNorm(Primitive):
         epsilon (float): A small value added for numerical stability. Default: 1e-5.
         momentum (float): The hyper parameter to compute moving average for running_mean and running_var
             (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
-            Momentum value should be [0, 1]. Default: 0.9.
+            Momentum value must be [0, 1]. Default: 0.9.
 
     Inputs:
         - **input_x** (Tensor) - Tensor of shape :math:`(N, C)`.
@@ -643,7 +643,7 @@ class FusedBatchNormEx(PrimitiveWithInfer):
         epsilon (float): A small value added for numerical stability. Default: 1e-5.
         momentum (float): The hyper parameter to compute moving average for running_mean and running_var
             (e.g. :math:`new\_running\_mean = momentum * running\_mean + (1 - momentum) * current\_mean`).
-            Momentum value should be [0, 1]. Default: 0.9.
+            Momentum value must be [0, 1]. Default: 0.9.
 
     Inputs:
         - **input_x** (Tensor) - The input of FusedBatchNormEx, Tensor of shape :math:`(N, C)`,
@@ -946,8 +946,8 @@ class Conv2D(PrimitiveWithInfer):
                     top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers, the
                     padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3] correspondingly.
         stride (Union(int, tuple[int])): The stride to be applied to the convolution filter. Default: 1.
-        dilation (Union(int, tuple[int])): Specify the space to use between kernel elements. Default: 1.
-        group (int): Split input into groups. Default: 1.
+        dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements. Default: 1.
+        group (int): Splits input into groups. Default: 1.
 
     Returns:
         Tensor, the value that applied 2D convolution.
@@ -977,7 +977,7 @@ class Conv2D(PrimitiveWithInfer):
                  stride=1,
                  dilation=1,
                  group=1):
-        """init Conv2D"""
+        """Initialize Conv2D"""
         self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
         self.stride = _check_positive_int_or_tuple('stride', stride, self.name, allow_four=True, ret_four=True)
@@ -1109,7 +1109,7 @@ class DepthwiseConv2dNative(PrimitiveWithInfer):
                  stride=1,
                  dilation=1,
                  group=1):
-        """init DepthwiseConv2dNative"""
+        """Initialize DepthwiseConv2dNative"""
         self.init_prim_io_names(inputs=['x', 'w'], outputs=['output'])
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
         self.stride = _check_positive_int_or_tuple('stride', stride, self.name)
@@ -1198,9 +1198,9 @@ class _Pool(PrimitiveWithInfer):
     Performs max/avg pooling operation.
 
     Args:
-        ksize (Union[int, tuple[int]]): The size of the kernel, that should be a tuple
+        ksize (Union[int, tuple[int]]): The size of the kernel, that must be a tuple
            of two `int` for height and width. Default: 1.
-        strides (Union[int, tuple[int]]): The stride of the window, that should be
+        strides (Union[int, tuple[int]]): The stride of the window, that must be
             a tuple of two `int` for height and width. Default: 1.
         padding (str): The optional value for pad mode, is "same" or "valid", not case sensitive.
             Default: "valid".
@@ -1338,7 +1338,7 @@ class MaxPoolWithArgmax(_Pool):
 
     Inputs:
         - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
-          Data type should be float16 or float32.
+          Data type must be float16 or float32.
 
     Outputs:
         Tuple of 2 Tensors, representing the maxpool result and where the max values are generated.
@@ -1501,7 +1501,7 @@ class Conv2DBackpropInput(PrimitiveWithInfer):
                  stride=1,
                  dilation=1,
                  group=1):
-        """init Conv2DBackpropInput"""
+        """Initialize Conv2DBackpropInput"""
         self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output'])
         self.out_channel = validator.check_integer('out_channel', out_channel, 0, Rel.GT, self.name)
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
@@ -1618,11 +1618,11 @@ class TopK(PrimitiveWithInfer):
     Finds values and indices of the `k` largest entries along the last dimension.
 
     Args:
-        sorted (bool): If True, the obtained elements will
+        sorted (bool): If true, the obtained elements will
             be sorted by the values in descending order. Default: False.
 
     Inputs:
-        - **input_x** (Tensor) - Input to be computed, data type should be float16, float32 or int32.
+        - **input_x** (Tensor) - Input to be computed, data type must be float16, float32 or int32.
         - **k** (int) - The number of top elements to be computed along the last dimension, constant input is needed.
 
     Outputs:
@@ -1674,7 +1674,7 @@ class SoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
             loss_{ij} = -\sum_j{Y_{ij} * ln(p_{ij})}
 
     Inputs:
-        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type should be float16 or float32.
+        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
         - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`.
 
     Outputs:
@@ -1725,9 +1725,9 @@ class SparseSoftmaxCrossEntropyWithLogits(PrimitiveWithInfer):
         is_grad (bool): If true, this operation returns the computed gradient. Default: False.
 
     Inputs:
-        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type should be float16 or float32.
+        - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
         - **labels** (Tensor) - Ground truth labels, with shape :math:`(N)`.
-          Data type should be int32 or int64.
+          Data type must be int32 or int64.
 
     Outputs:
         Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor;
@@ -1770,18 +1770,19 @@ class ApplyMomentum(PrimitiveWithInfer):
     Data type conversion of Parameter is not supported. RuntimeError exception will be thrown.
 
     Args:
-        use_locking (bool): Enable a lock to protect the update of variable and accumlation tensors. Default: False.
+        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
+                            from being updated. Default: False.
         use_nesterov (bool): Enable Nesterov momentum. Default: False.
         gradient_scale (float): The scale of the gradient. Default: 1.0.
 
     Inputs:
-        - **variable** (Parameter) - Weights to be updated. data type should be float.
+        - **variable** (Parameter) - Weights to be updated. data type must be float.
         - **accumulation** (Parameter) - Accumulated gradient value by moment weight.
           Has the same data type with `variable`.
-        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, should be a float number or
+        - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float number or
           a scalar tensor with float data type.
-        - **gradient** (Tensor) - Gradients, has the same data type as `variable`.
-        - **momentum** (Union[Number, Tensor]) - Momentum, should be a float number or
+        - **gradient** (Tensor) - Gradient, has the same data type as `variable`.
+        - **momentum** (Union[Number, Tensor]) - Momentum, must be a float number or
           a scalar tensor with float data type.
 
     Outputs:
@@ -1842,7 +1843,7 @@ class SmoothL1Loss(PrimitiveWithInfer):
             quadratic to linear. Default: 1.0.
 
     Inputs:
-        - **prediction** (Tensor) - Predict data. Data type should be float16 or float32.
+        - **prediction** (Tensor) - Predict data. Data type must be float16 or float32.
         - **target** (Tensor) - Ground truth data, with the same type and shape as `prediction`.
 
     Outputs:
@@ -1884,7 +1885,7 @@ class L2Loss(PrimitiveWithInfer):
     :math:`nelement(x)` represents the number of `input_x`.
 
     Inputs:
-        - **input_x** (Tensor) - A input Tensor. Data type should be float16 or float32.
+        - **input_x** (Tensor) - A input Tensor. Data type must be float16 or float32.
 
     Outputs:
         Tensor, has the same dtype as `input_x`. The output tensor is the value of loss which is a scalar tensor.
@@ -1898,7 +1899,7 @@ class L2Loss(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init L2Loss"""
+        """Initialize L2Loss"""
 
     def infer_shape(self, input_x):
         loss_shape = []
@@ -1958,7 +1959,7 @@ class RNNTLoss(PrimitiveWithInfer):
         blank_label (int): blank label. Default: 0.
 
     Inputs:
-        - **acts** (Tensor) - Tensor of shape :math:`(B, T, U, V)`. Data type should be float16 or float32.
+        - **acts** (Tensor) - Tensor of shape :math:`(B, T, U, V)`. Data type must be float16 or float32.
         - **labels** (Tensor[int32]) - Tensor of shape :math:`(B, U-1)`.
         - **input_lengths** (Tensor[int32]) - Tensor of shape :math:`(B,)`.
         - **label_lengths** (Tensor[int32]) - Tensor of shape :math:`(B,)`.
@@ -2024,7 +2025,7 @@ class SGD(PrimitiveWithInfer):
 
     Inputs:
         - **parameters** (Tensor) - Parameters to be updated. With float16 or float32 data type.
-        - **gradient** (Tensor) - Gradients. With float16 or float32 data type.
+        - **gradient** (Tensor) - Gradient, with float16 or float32 data type.
         - **learning_rate** (Tensor) - Learning rate, a scalar tensor with float16 or float32 data type.
           e.g. Tensor(0.1, mindspore.float32)
         - **accum** (Tensor) - Accum(velocity) to be updated. With float16 or float32 data type.
@@ -2102,15 +2103,16 @@ class ApplyRMSProp(PrimitiveWithInfer):
         :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
 
     Args:
-        use_locking (bool): Enable a lock to protect the update of variable tensors. Default: False.
+        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
+                            from being updated. Default: False.
 
     Inputs:
         - **var** (Tensor) - Weights to be update.
         - **mean_square** (Tensor) - Mean square gradients, must have the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must have the same type as `var`.
-        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Should be a float number or
+        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
           a scalar tensor with float16 or float32 data type.
-        - **grad** (Tensor) - Gradients, must have the same type as `var`.
+        - **grad** (Tensor) - Gradient, must have the same type as `var`.
         - **decay** (float) - Decay rate. Only constant value is allowed.
         - **momentum** (float) - Momentum. Only constant value is allowed.
         - **epsilon** (float) - Ridge term. Only constant value is allowed.
@@ -2197,15 +2199,16 @@ class ApplyCenteredRMSProp(PrimitiveWithInfer):
         :math:`\\eta` represents `learning_rate`. :math:`\\nabla Q_{i}(w)` represents `grad`.
 
     Args:
-        use_locking (bool): Enable a lock to protect the update of variable tensors. Default: False.
+        use_locking (bool): Whether to enable a lock to protect the variable and accumlation tensors
+                            from being updated. Default: False.
 
     Inputs:
         - **var** (Tensor) - Weights to be update.
         - **mean_gradient** (Tensor) - Mean gradients, must have the same type as `var`.
         - **mean_square** (Tensor) - Mean square gradients, must have the same type as `var`.
         - **moment** (Tensor) - Delta of `var`, must have the same type as `var`.
-        - **grad** (Tensor) - Gradients, must have the same type as `var`.
-        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Should be a float number or
+        - **grad** (Tensor) - Gradient, must have the same type as `var`.
+        - **learning_rate** (Union[Number, Tensor]) - Learning rate. Must be a float number or
           a scalar tensor with float16 or float32 data type.
         - **decay** (float) - Decay rate.
         - **momentum** (float) - Momentum.
@@ -2280,9 +2283,9 @@ class LayerNorm(Primitive):
 
     Args:
         begin_norm_axis (int): The begin axis of the `input_x` to apply LayerNorm,
-            the value should be in [-1, rank(input)). Default: 1.
+            the value must be in [-1, rank(input)). Default: 1.
         begin_params_axis (int): The begin axis of the parameter input (`gamma`, `beta`) to
-            apply LayerNorm, the value should be in [-1, rank(input)). Default: 1.
+            apply LayerNorm, the value must be in [-1, rank(input)). Default: 1.
         epsilon (float): A value added to the denominator for numerical stability. Default: 1e-7.
 
     Inputs:
@@ -2334,7 +2337,7 @@ class L2Normalize(PrimitiveWithInfer):
         epsilon (float): A small value added for numerical stability. Default: 1e-4.
 
     Inputs:
-        - **input_x** (Tensor) - Input to compute the normalization. Data type should be float16 or float32.
+        - **input_x** (Tensor) - Input to compute the normalization. Data type must be float16 or float32.
 
     Outputs:
         Tensor, with the same type and shape as the input.
@@ -2472,8 +2475,8 @@ class ResizeBilinear(PrimitiveWithInfer):
 
     Args:
         size (tuple[int]): A tuple of 2 int elements `(new_height, new_width)`, the new size of the images.
-        align_corners (bool): If True, rescale input by `(new_height - 1) / (height - 1)`,
-                       which exactly aligns the 4 corners of images and resized images. If False,
+        align_corners (bool): If true, rescale input by `(new_height - 1) / (height - 1)`,
+                       which exactly aligns the 4 corners of images and resized images. If false,
                        rescale by `new_height / height`. Default: False.
 
     Inputs:
@@ -2595,7 +2598,7 @@ class Gelu(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init GeLU"""
+        """Initialize GeLU"""
         self.init_prim_io_names(inputs=['x'], outputs=['output'])
 
     def infer_shape(self, input_x):
@@ -2837,7 +2840,7 @@ class SigmoidCrossEntropyWithLogits(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """Init SigmoidCrossEntropyWithLogits"""
+        """Initialize SigmoidCrossEntropyWithLogits"""
         self.init_prim_io_names(inputs=['predict', 'target'], outputs=['loss'])
 
     def infer_shape(self, x_shape, y_shape):
@@ -2879,7 +2882,7 @@ class Pad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, paddings):
-        """Init Pad"""
+        """Initialize Pad"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         if not isinstance(paddings, tuple):
             raise TypeError('Paddings must be tuple type.')
@@ -2949,7 +2952,7 @@ class MirrorPad(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, mode='REFLECT'):
-        """Init Pad"""
+        """Initialize Pad"""
         validator.check_string('mode', mode, ['REFLECT', 'SYMMETRIC'], self.name)
         self.mode = mode
         self.set_const_input_indexes([1])
@@ -2990,14 +2993,14 @@ class ROIAlign(PrimitiveWithInfer):
         pooled_width (int): The output features' width.
         spatial_scale (float): A scaling factor that maps the raw image coordinates to the input
             feature map coordinates. Suppose the height of a RoI is `ori_h` in the raw image and `fea_h` in the
-            input feature map, the `spatial_scale` should be `fea_h / ori_h`.
+            input feature map, the `spatial_scale` must be `fea_h / ori_h`.
         sample_num (int): Number of sampling points. Default: 2.
         roi_end_mode (int): Number must be 0 or 1. Default: 1.
 
     Inputs:
-        - **features** (Tensor) - The input features, whose shape should be `(N, C, H, W)`.
+        - **features** (Tensor) - The input features, whose shape must be `(N, C, H, W)`.
         - **rois** (Tensor) - The shape is `(rois_n, 5)`. With data type of float16 or float32.
-          `rois_n` represents the number of RoI. The size of the second dimension should be `5` and the `5` colunms
+          `rois_n` represents the number of RoI. The size of the second dimension must be `5` and the `5` colunms
           are `(image_index, top_left_x, top_left_y, bottom_right_x, bottom_right_y)`. `image_index` represents the
           index of image. `top_left_x` and `top_left_y` represent the `x, y` coordinates of the top left corner
           of corresponding RoI, respectively. `bottom_right_x` and `bottom_right_y` represent the `x, y`
@@ -3016,7 +3019,7 @@ class ROIAlign(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, pooled_height, pooled_width, spatial_scale, sample_num=2, roi_end_mode=1):
-        """init ROIAlign"""
+        """Initialize ROIAlign"""
         validator.check_value_type("pooled_height", pooled_height, [int], self.name)
         validator.check_value_type("pooled_width", pooled_width, [int], self.name)
         validator.check_value_type("spatial_scale", spatial_scale, [float], self.name)
@@ -3080,7 +3083,7 @@ class Adam(PrimitiveWithInfer):
         - **beta1** (float) - The exponential decay rate for the 1st moment estimations.
         - **beta2** (float) - The exponential decay rate for the 2nd moment estimations.
         - **epsilon** (float) - Term added to the denominator to improve numerical stability.
-        - **gradient** (Tensor) - Gradients, has the same type as `var`.
+        - **gradient** (Tensor) - Gradient, has the same type as `var`.
 
     Outputs:
         Tuple of 3 Tensor, the updated parameters.
@@ -3134,7 +3137,7 @@ class Adam(PrimitiveWithInfer):
 
 class FusedSparseAdam(PrimitiveWithInfer):
     r"""
-    Merge the duplicate value of the gradient and then update parameters by Adaptive Moment Estimation (Adam)
+    Merges the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
     algorithm. This operator is used when the gradient is sparse.
 
     The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
@@ -3266,7 +3269,7 @@ class FusedSparseAdam(PrimitiveWithInfer):
 
 class FusedSparseLazyAdam(PrimitiveWithInfer):
     r"""
-    Merge the duplicate value of the gradient and then update parameters by Adaptive Moment Estimation (Adam)
+    Merges the duplicate value of the gradient and then updates parameters by Adaptive Moment Estimation (Adam)
     algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
     original Adam algorithm, as only the current indices parameters will be updated.
 
@@ -3400,7 +3403,7 @@ class FusedSparseLazyAdam(PrimitiveWithInfer):
 
 class FusedSparseFtrl(PrimitiveWithInfer):
     """
-    Merge the duplicate value of the gradient and then update relevant entries according to the FTRL-proximal scheme.
+    Merges the duplicate value of the gradient and then updates relevant entries according to the FTRL-proximal scheme.
 
     All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
     If they have different data types, lower priority data type will be converted to
@@ -3413,7 +3416,7 @@ class FusedSparseFtrl(PrimitiveWithInfer):
         l2 (float): l2 regularization strength, must be greater than or equal to zero.
         lr_power (float): Learning rate power controls how the learning rate decreases during training,
             must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
-        use_locking (bool): Use locks for updating operation if True . Default: False.
+        use_locking (bool): Use locks for updating operation if true . Default: False.
 
     Inputs:
         - **var** (Parameter) - The variable to be updated. The data type must be float32.
@@ -3495,7 +3498,7 @@ class FusedSparseFtrl(PrimitiveWithInfer):
 
 class FusedSparseProximalAdagrad(PrimitiveWithInfer):
     r"""
-    Merge the duplicate value of the gradient and then update relevant entries according to the proximal adagrad
+    Merges the duplicate value of the gradient and then updates relevant entries according to the proximal adagrad
     algorithm.
 
     .. math::
@@ -3608,7 +3611,7 @@ class KLDivLoss(PrimitiveWithInfer):
 
     Args:
         reduction (str): Specifies the reduction to be applied to the output.
-            Its value should be one of 'none', 'mean', 'sum'. Default: 'mean'.
+            Its value must be one of 'none', 'mean', 'sum'. Default: 'mean'.
 
     Inputs:
         - **input_x** (Tensor) - The input Tensor. The data type must be float32.
@@ -3680,13 +3683,13 @@ class BinaryCrossEntropy(PrimitiveWithInfer):
 
     Args:
         reduction (str): Specifies the reduction to be applied to the output.
-            Its value should be one of 'none', 'mean', 'sum'. Default: 'mean'.
+            Its value must be one of 'none', 'mean', 'sum'. Default: 'mean'.
 
     Inputs:
-        - **input_x** (Tensor) - The input Tensor. The data type should be float16 or float32.
+        - **input_x** (Tensor) - The input Tensor. The data type must be float16 or float32.
         - **input_y** (Tensor) - The label Tensor which has same shape and data type as `input_x`.
         - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
-          And it should have same shape and data type as `input_x`. Default: None.
+          And it must have same shape and data type as `input_x`. Default: None.
 
     Outputs:
         Tensor or Scalar, if `reduction` is 'none', then output is a tensor and has the same shape as `input_x`.
@@ -3739,7 +3742,7 @@ class BinaryCrossEntropy(PrimitiveWithInfer):
 
 class ApplyAdaMax(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adamax scheme.
+    Updates relevant entries according to the adamax scheme.
 
     The updating formulas are as follows,
 
@@ -3769,15 +3772,15 @@ class ApplyAdaMax(PrimitiveWithInfer):
           With float32 or float16 data type.
         - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
           with the same shape and type as `var`. With float32 or float16 data type.
-        - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, should be scalar.
+        - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be scalar.
           With float32 or float16 data type.
-        - **lr** (Union[Number, Tensor]) - Learning rate, :math:`l` in the updating formula, should be scalar.
+        - **lr** (Union[Number, Tensor]) - Learning rate, :math:`l` in the updating formula, must be scalar.
           With float32 or float16 data type.
         - **beta1** (Union[Number, Tensor]) - The exponential decay rate for the 1st moment estimations,
-          should be scalar. With float32 or float16 data type.
+          must be scalar. With float32 or float16 data type.
         - **beta2** (Union[Number, Tensor]) - The exponential decay rate for the 2nd moment estimations,
-          should be scalar. With float32 or float16 data type.
-        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, should be scalar.
+          must be scalar. With float32 or float16 data type.
+        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be scalar.
           With float32 or float16 data type.
         - **grad** (Tensor) - A tensor for gradient, has the same shape and type as `var`.
           With float32 or float16 data type.
@@ -3829,7 +3832,7 @@ class ApplyAdaMax(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ApplyAdaMax"""
+        """Initialize ApplyAdaMax"""
 
     def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, lr_shape,
                     beta1_shape, beta2_shape, epsilon_shape, grad_shape):
@@ -3873,7 +3876,7 @@ class ApplyAdaMax(PrimitiveWithInfer):
 
 class ApplyAdadelta(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adadelta scheme.
+    Updates relevant entries according to the adadelta scheme.
 
     .. math::
             accum = \rho * accum + (1 - \rho) * grad^2
@@ -3896,9 +3899,9 @@ class ApplyAdadelta(PrimitiveWithInfer):
           With float32 or float16 data type.
         - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and type as `var`.
           With float32 or float16 data type.
-        - **lr** (Union[Number, Tensor]) - Learning rate, should be scalar. With float32 or float16 data type.
-        - **rho** (Union[Number, Tensor]) - Decay rate, should be scalar. With float32 or float16 data type.
-        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, should be scalar.
+        - **lr** (Union[Number, Tensor]) - Learning rate, must be scalar. With float32 or float16 data type.
+        - **rho** (Union[Number, Tensor]) - Decay rate, must be scalar. With float32 or float16 data type.
+        - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be scalar.
           With float32 or float16 data type.
         - **grad** (Tensor) - Gradients, has the same shape and type as `var`. With float32 or float16 data type.
 
@@ -3945,7 +3948,7 @@ class ApplyAdadelta(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init ApplyAdadelta"""
+        """Initialize ApplyAdadelta"""
 
     def infer_shape(self, var_shape, accum_shape, accum_update_shape, lr_shape, rho_shape,
                     epsilon_shape, grad_shape):
@@ -3979,7 +3982,7 @@ class ApplyAdadelta(PrimitiveWithInfer):
 
 class ApplyAdagrad(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adagrad scheme.
+    Updates relevant entries according to the adagrad scheme.
 
     .. math::
             accum += grad * grad
@@ -3997,10 +4000,10 @@ class ApplyAdagrad(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
-        - **accum** (Parameter) - Accumulation to be updated. The shape and dtype should be the same as `var`.
+        - **accum** (Parameter) - Accumulation to be updated. The shape and dtype must be the same as `var`.
           With float32 or float16 data type.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be scalar. With float32 or float16 data type.
-        - **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`.
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be scalar. With float32 or float16 data type.
+        - **grad** (Tensor) - A tensor for gradient. The shape and dtype must be the same as `var`.
           With float32 or float16 data type.
 
     Outputs:
@@ -4060,7 +4063,7 @@ class ApplyAdagrad(PrimitiveWithInfer):
 
 class ApplyAdagradV2(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adagradv2 scheme.
+    Updates relevant entries according to the adagradv2 scheme.
 
     .. math::
             accum += grad * grad
@@ -4079,11 +4082,11 @@ class ApplyAdagradV2(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
-        - **accum** (Parameter) - Accumulation to be updated. The shape and dtype should be the same as `var`.
+        - **accum** (Parameter) - Accumulation to be updated. The shape and dtype must be the same as `var`.
           With float16 or float32 data type.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be a float number or
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
           a scalar tensor with float16 or float32 data type.
-        - **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`.
+        - **grad** (Tensor) - A tensor for gradient. The shape and dtype must be the same as `var`.
           With float16 or float32 data type.
 
     Outputs:
@@ -4143,7 +4146,7 @@ class ApplyAdagradV2(PrimitiveWithInfer):
 
 class SparseApplyAdagrad(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adagrad scheme.
+    Updates relevant entries according to the adagrad scheme.
 
     .. math::
             accum += grad * grad
@@ -4164,7 +4167,7 @@ class SparseApplyAdagrad(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
-        - **accum** (Parameter) - Accumulation to be updated. The shape and data type should be the same as `var`.
+        - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
         - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape except the first dimension.
           Gradients has the same data type as `var`.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
@@ -4229,7 +4232,7 @@ class SparseApplyAdagrad(PrimitiveWithInfer):
 
 class SparseApplyAdagradV2(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the adagrad scheme.
+    Updates relevant entries according to the adagrad scheme.
 
     .. math::
             accum += grad * grad
@@ -4251,7 +4254,7 @@ class SparseApplyAdagradV2(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
-        - **accum** (Parameter) - Accumulation to be updated. The shape and data type should be the same as `var`.
+        - **accum** (Parameter) - Accumulation to be updated. The shape and data type must be the same as `var`.
         - **grad** (Tensor) - Gradient. The shape must be the same as `var`'s shape except the first dimension.
           Gradients has the same data type as `var`.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
@@ -4317,7 +4320,7 @@ class SparseApplyAdagradV2(PrimitiveWithInfer):
 
 class ApplyProximalAdagrad(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the proximal adagrad algorithm.
+    Updates relevant entries according to the proximal adagrad algorithm.
 
     .. math::
             accum += grad * grad
@@ -4337,13 +4340,13 @@ class ApplyProximalAdagrad(PrimitiveWithInfer):
             Default: False.
 
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type should be float16 or float32.
+        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
         - **accum** (Parameter) - Accumulation to be updated. Must has the same shape and dtype as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be scalar. The data type should be
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be scalar. The data type must be
           float16 or float32.
-        - **l1** (Union[Number, Tensor]) - l1 regularization strength, should be scalar. The data type should be
+        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar. The data type must be
           float16 or float32.
-        - **l2** (Union[Number, Tensor]) - l2 regularization strength, should be scalar. The data type should be
+        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be scalar. The data type must be
           float16 or float32.
         - **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
 
@@ -4419,7 +4422,7 @@ class ApplyProximalAdagrad(PrimitiveWithInfer):
 
 class SparseApplyProximalAdagrad(PrimitiveWithCheck):
     r"""
-    Update relevant entries according to the proximal adagrad algorithm. Compared with ApplyProximalAdagrad,
+    Updates relevant entries according to the proximal adagrad algorithm. Compared with ApplyProximalAdagrad,
     an additional index tensor is input.
 
     .. math::
@@ -4442,11 +4445,11 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
     Inputs:
         - **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
         - **accum** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be a float number or
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
           a scalar tensor with float16 or float32 data type.
-        - **l1** (Union[Number, Tensor]) - l1 regularization strength, should be a float number or
+        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
           a scalar tensor with float16 or float32 data type.
-        - **l2** (Union[Number, Tensor]) - l2 regularization strength, should be a float number or
+        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be a float number or
           a scalar tensor with float16 or float32 data type..
         - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
         - **indices** (Tensor) - A vector of indices in the first dimension of `var` and `accum`.
@@ -4513,7 +4516,7 @@ class SparseApplyProximalAdagrad(PrimitiveWithCheck):
 
 class ApplyAddSign(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the AddSign algorithm.
+    Updates relevant entries according to the AddSign algorithm.
 
     .. math::
         \begin{array}{ll} \\
@@ -4534,11 +4537,11 @@ class ApplyAddSign(PrimitiveWithInfer):
     Inputs:
         - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
         - **m** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar.
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
           With float32 or float16 data type.
-        - **alpha** (Union[Number, Tensor]) - Should be a scalar. With float32 or float16 data type.
-        - **sign_decay** (Union[Number, Tensor]) - Should be a scalar. With float32 or float16 data type.
-        - **beta** (Union[Number, Tensor]) - The exponential decay rate, should be a scalar.
+        - **alpha** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
+        - **sign_decay** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
+        - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar.
           With float32 or float16 data type.
         - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
 
@@ -4583,7 +4586,7 @@ class ApplyAddSign(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        "init ApplyAddSign"
+        "Initialize ApplyAddSign"
 
     def infer_shape(self, var_shape, m_shape, lr_shape, alpha_shape, sign_decay_shape, beta_shape, grad_shape):
         validator.check('m_shape', m_shape, 'var_shape', var_shape, Rel.EQ, self.name)
@@ -4619,7 +4622,7 @@ class ApplyAddSign(PrimitiveWithInfer):
 
 class ApplyPowerSign(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the AddSign algorithm.
+    Updates relevant entries according to the AddSign algorithm.
 
     .. math::
         \begin{array}{ll} \\
@@ -4642,11 +4645,11 @@ class ApplyPowerSign(PrimitiveWithInfer):
         - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
           If data type of `var` is float16, all inputs must have the same data type as `var`.
         - **m** (Parameter) - Variable tensor to be updated, has the same dtype as `var`.
-        - **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar.
+        - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
           With float32 or float16 data type.
-        - **logbase** (Union[Number, Tensor]) - Should be a scalar. With float32 or float16 data type.
-        - **sign_decay** (Union[Number, Tensor]) - Should be a scalar. With float32 or float16 data type.
-        - **beta** (Union[Number, Tensor]) - The exponential decay rate, should be a scalar.
+        - **logbase** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
+        - **sign_decay** (Union[Number, Tensor]) - Must be a scalar. With float32 or float16 data type.
+        - **beta** (Union[Number, Tensor]) - The exponential decay rate, must be a scalar.
           With float32 or float16 data type.
         - **grad** (Tensor) - A tensor of the same type as `var`, for the gradient.
 
@@ -4692,7 +4695,7 @@ class ApplyPowerSign(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        "init ApplyPowerSign"
+        "Initialize ApplyPowerSign"
 
     def infer_shape(self, var_shape, m_shape, lr_shape, logbase_shape, sign_decay_shape, beta_shape, grad_shape):
         validator.check('m_shape', m_shape, 'var_shape', var_shape, Rel.EQ, self.name)
@@ -4728,7 +4731,7 @@ class ApplyPowerSign(PrimitiveWithInfer):
 
 class ApplyGradientDescent(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the following formula.
+    Updates relevant entries according to the following formula.
 
     .. math::
         var = var - \alpha * \delta
@@ -4740,7 +4743,7 @@ class ApplyGradientDescent(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
-        - **alpha** (Union[Number, Tensor]) - Scaling factor, should be a scalar. With float32 or float16 data type.
+        - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
         - **delta** (Tensor) - A tensor for the change, has the same type as `var`.
 
     Outputs:
@@ -4773,7 +4776,7 @@ class ApplyGradientDescent(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        "init ApplyGradientDescent"
+        "Initialize ApplyGradientDescent"
 
     def infer_shape(self, var_shape, alpha_shape, delta_shape):
         validator.check('delta shape', delta_shape, 'var shape', var_shape, Rel.EQ, self.name)
@@ -4793,7 +4796,7 @@ class ApplyGradientDescent(PrimitiveWithInfer):
 
 class ApplyProximalGradientDescent(PrimitiveWithInfer):
     r"""
-    Update relevant entries according to the FOBOS(Forward Backward Splitting) algorithm.
+    Updates relevant entries according to the FOBOS(Forward Backward Splitting) algorithm.
 
     .. math::
         \text{prox_v} = var - \alpha * \delta
@@ -4807,10 +4810,10 @@ class ApplyProximalGradientDescent(PrimitiveWithInfer):
 
     Inputs:
         - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
-        - **alpha** (Union[Number, Tensor]) - Saling factor, should be a scalar. With float32 or float16 data type.
-        - **l1** (Union[Number, Tensor]) - l1 regularization strength, should be scalar.
+        - **alpha** (Union[Number, Tensor]) - Saling factor, must be a scalar. With float32 or float16 data type.
+        - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be scalar.
           With float32 or float16 data type.
-        - **l2** (Union[Number, Tensor]) - l2 regularization strength, should be scalar.
+        - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be scalar.
           With float32 or float16 data type.
         - **delta** (Tensor) - A tensor for the change, has the same type as `var`.
 
@@ -4848,7 +4851,7 @@ class ApplyProximalGradientDescent(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        "init ApplyGradientDescent"
+        "Initialize ApplyGradientDescent"
 
     def infer_shape(self, var_shape, alpha_shape, l1_shape, l2_shape, delta_shape):
         validator.check('delta shape', delta_shape, 'var shape', var_shape, Rel.EQ, self.name)
@@ -4878,7 +4881,7 @@ class ApplyProximalGradientDescent(PrimitiveWithInfer):
 
 class LARSUpdate(PrimitiveWithInfer):
     """
-    Conduct lars (layer-wise adaptive rate scaling) update on the sum of squares of gradient.
+    Conducts lars (layer-wise adaptive rate scaling) update on the sum of squares of gradient.
 
     Args:
         epsilon (float): Term added to the denominator to improve numerical stability. Default: 1e-05.
@@ -4890,8 +4893,8 @@ class LARSUpdate(PrimitiveWithInfer):
         - **gradient** (Tensor) - The gradient of weight, which has the same shape and dtype with weight.
         - **norm_weight** (Tensor) - A scalar tensor, representing the sum of squares of weight.
         - **norm_gradient** (Tensor) - A scalar tensor, representing the sum of squares of gradient.
-        - **weight_decay** (Union[Number, Tensor]) - Weight decay. It should be a scalar tensor or number.
-        - **learning_rate** (Union[Number, Tensor]) - Learning rate. It should be a scalar tensor or number.
+        - **weight_decay** (Union[Number, Tensor]) - Weight decay. It must be a scalar tensor or number.
+        - **learning_rate** (Union[Number, Tensor]) - Learning rate. It must be a scalar tensor or number.
 
     Outputs:
         Tensor, represents the new gradient.
@@ -4954,25 +4957,25 @@ class LARSUpdate(PrimitiveWithInfer):
 
 class ApplyFtrl(PrimitiveWithInfer):
     """
-    Update relevant entries according to the FTRL scheme.
+    Updates relevant entries according to the FTRL scheme.
 
     Args:
-        use_locking (bool): Use locks for updating operation if True . Default: False.
+        use_locking (bool): Use locks for updating operation if true . Default: False.
 
     Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type should be float16 or float32.
+        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
         - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
         - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`.
-        - **grad** (Tensor) - Gradient. The data type should be float16 or float32.
+        - **grad** (Tensor) - Gradient. The data type must be float16 or float32.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: 0.001.
-          It should be a float number or a scalar tensor with float16 or float32 data type.
+          It must be a float number or a scalar tensor with float16 or float32 data type.
         - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be greater than or equal to zero.
-          Default: 0.0. It should be a float number or a scalar tensor with float16 or float32 data type.
+          Default: 0.0. It must be a float number or a scalar tensor with float16 or float32 data type.
         - **l2** (Union[Number, Tensor]) - l2 regularization strength, must be greater than or equal to zero.
-          Default: 0.0. It should be a float number or a scalar tensor with float16 or float32 data type.
+          Default: 0.0. It must be a float number or a scalar tensor with float16 or float32 data type.
         - **lr_power** (Union[Number, Tensor]) - Learning rate power controls how the learning rate decreases
           during training, must be less than or equal to zero. Use fixed learning rate if lr_power is zero.
-          Default: -0.5. It should be a float number or a scalar tensor with float16 or float32 data type.
+          Default: -0.5. It must be a float number or a scalar tensor with float16 or float32 data type.
 
     Outputs:
         Tensor, represents the updated `var`.
@@ -5040,7 +5043,7 @@ class ApplyFtrl(PrimitiveWithInfer):
 
 class SparseApplyFtrl(PrimitiveWithCheck):
     """
-    Update relevant entries according to the FTRL-proximal scheme.
+    Updates relevant entries according to the FTRL-proximal scheme.
 
     All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
     If they have different data types, lower priority data type will be converted to
@@ -5053,7 +5056,7 @@ class SparseApplyFtrl(PrimitiveWithCheck):
         l2 (float): l2 regularization strength, must be greater than or equal to zero.
         lr_power (float): Learning rate power controls how the learning rate decreases during training,
             must be less than or equal to zero. Use fixed learning rate if `lr_power` is zero.
-        use_locking (bool): Use locks for updating operation if True . Default: False.
+        use_locking (bool): Use locks for updating operation if true . Default: False.
 
     Inputs:
         - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
@@ -5130,7 +5133,7 @@ class SparseApplyFtrl(PrimitiveWithCheck):
 
 class SparseApplyFtrlV2(PrimitiveWithInfer):
     """
-    Update relevant entries according to the FTRL-proximal scheme.
+    Updates relevant entries according to the FTRL-proximal scheme.
 
     All of inputs except `indices` comply with the implicit type conversion rules to make the data types consistent.
     If they have different data types, lower priority data type will be converted to
@@ -5272,12 +5275,12 @@ class CTCLoss(PrimitiveWithInfer):
         ctc_merge_repeated (bool): If false, during CTC calculation, repeated non-blank labels will not be merged
                                    and these labels will be interpreted as individual ones. This is a simplfied
                                    version of CTC. Default: True.
-        ignore_longer_outputs_than_inputs (bool): If True, sequences with longer outputs than inputs will be ignored.
+        ignore_longer_outputs_than_inputs (bool): If true, sequences with longer outputs than inputs will be ignored.
                                                   Default: False.
 
     Inputs:
-        - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
-          :math:`(max_time, batch_size, num_classes)`. `num_classes` should be `num_labels + 1` classes, `num_labels`
+        - **inputs** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
+          :math:`(max_time, batch_size, num_classes)`. `num_classes` must be `num_labels + 1` classes, `num_labels`
           indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
           Data type must be float16, float32 or float64.
         - **labels_indices** (Tensor) - The indices of labels. `labels_indices[i, :] == [b, t]` means `labels_values[i]`
@@ -5285,7 +5288,7 @@ class CTCLoss(PrimitiveWithInfer):
         - **labels_values** (Tensor) - A `1-D` input tensor. The values are associated with the given batch and time.
           The type must be int32. `labels_values[i]` must in the range of `[0, num_classes)`.
         - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
-          The type must be int32. Each value in the tensor should not be greater than `max_time`.
+          The type must be int32. Each value in the tensor must not be greater than `max_time`.
 
     Outputs:
         - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch_size)`. The tensor has
@@ -5342,15 +5345,15 @@ class CTCGreedyDecoder(PrimitiveWithInfer):
     Performs greedy decoding on the logits given in inputs.
 
     Args:
-        merge_repeated (bool): If True, merge repeated classes in output. Default: True.
+        merge_repeated (bool): If true, merge repeated classes in output. Default: True.
 
     Inputs:
-        - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
-          :math:`(max_time, batch_size, num_classes)`. `num_classes` should be `num_labels + 1` classes, `num_labels`
+        - **inputs** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
+          :math:`(max_time, batch_size, num_classes)`. `num_classes` must be `num_labels + 1` classes, `num_labels`
           indicates the number of actual labels. Blank labels are reserved. Default blank label is `num_classes - 1`.
           Data type must be float32 or float64.
         - **sequence_length** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
-          The type must be int32. Each value in the tensor should not greater than `max_time`.
+          The type must be int32. Each value in the tensor must not greater than `max_time`.
 
     Outputs:
         - **decoded_indices** (Tensor) - A tensor with shape of :math:`(total_decoded_outputs, 2)`.
@@ -5516,7 +5519,7 @@ class InTopK(PrimitiveWithInfer):
     Whether the targets are in the top `k` predictions.
 
     Args:
-        k (int): Specify the number of top elements to be used for computing precision.
+        k (int): Specifies the number of top elements to be used for computing precision.
 
     Inputs:
         - **x1** (Tensor) - A 2D Tensor defines the predictions of a batch of samples with float16 or float32 data type.
@@ -5538,7 +5541,7 @@ class InTopK(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, k):
-        """Init InTopK"""
+        """Initialize InTopK"""
         self.init_prim_io_names(inputs=['x1', 'x2', 'k'], outputs=['y'])
         validator.check_value_type("k", k, [int], self.name)
 
@@ -5564,7 +5567,7 @@ class LRN(PrimitiveWithInfer):
         bias (float): An offset (usually positive to avoid dividing by 0).
         alpha (float): A scale factor, usually positive.
         beta (float): An exponent.
-        norm_region (str): Specify normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
+        norm_region (str): Specifies normalization region. Options: "ACROSS_CHANNELS". Default: "ACROSS_CHANNELS".
 
     Inputs:
         - **x** (Tensor) - A 4D Tensor with float16 or float32 data type.
@@ -5579,7 +5582,7 @@ class LRN(PrimitiveWithInfer):
     """
     @prim_attr_register
     def __init__(self, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CHANNELS"):
-        """Init LRN"""
+        """Initialize LRN"""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type("depth_radius", depth_radius, [int], self.name)
         validator.check_value_type("bias", bias, [float], self.name)
@@ -5605,18 +5608,18 @@ class CTCLossV2(PrimitiveWithInfer):
         - Cudnn Uses label value of for the `blank`
 
     Inputs:
-        - **inputs** (Tensor) - The input Tensor should be a `3-D` tensor whose shape is
-          :math:`(max_time, batch_size, num_class)`. `num_class` should be `num_labels + 1` classes, `num_labels`
+        - **inputs** (Tensor) - The input Tensor must be a `3-D` tensor whose shape is
+          :math:`(max_time, batch_size, num_class)`. `num_class` must be `num_labels + 1` classes, `num_labels`
           indicates the number of actual labels. Blank labels are reserved.
-        - **labels** (Tensor) - The labels Tensor should be a `1-D` tensor whose shape is
+        - **labels** (Tensor) - The labels Tensor must be a `1-D` tensor whose shape is
           :math:`(\sigma{label_lengths})`
           or `2-D` tensor whose shape is
           :math:`(max_time, max{label_lengths})`
           The type must be int32.
         - **input_lengths** (Tensor) - A `1-D` input tensor whose shape is
-          :math:`(batch_size,)`. The values should be batch. The type must be int32.
+          :math:`(batch_size,)`. The values must be batch. The type must be int32.
         - **label_lengths** (Tensor) - A tensor containing sequence lengths with the shape of :math:`(batch_size)`.
-          The type must be int32. Each value in the tensor should not greater than `max_time`.
+          The type must be int32. Each value in the tensor must not greater than `max_time`.
 
     Outputs:
         - **loss** (Tensor) - A tensor containing log-probabilities, the shape is :math:`(batch_size)`, has the same
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index 50712336f1..47bf0d84f0 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -23,7 +23,7 @@ from ..primitive import Primitive, PrimitiveWithCheck, PrimitiveWithInfer, prim_
 
 class Assign(PrimitiveWithCheck):
     """
-    Assign `Parameter` with a value.
+    Assigns `Parameter` with a value.
 
     Inputs of `variable` and `value` comply with the implicit type conversion rules to make the data types consistent.
     If they have different data types, lower priority data type will be converted to
@@ -67,7 +67,7 @@ class Assign(PrimitiveWithCheck):
 
 class BoundingBoxEncode(PrimitiveWithInfer):
     """
-    Encode bounding boxes locations.
+    Encodes bounding boxes locations.
 
     Args:
         means (tuple): Means for encoding bounding boxes calculation. Default: (0.0, 0.0, 0.0, 0.0).
@@ -118,7 +118,7 @@ class BoundingBoxEncode(PrimitiveWithInfer):
 
 class BoundingBoxDecode(PrimitiveWithInfer):
     """
-    Decode bounding boxes locations.
+    Decodes bounding boxes locations.
 
     Args:
         means (tuple): The means of deltas calculation. Default: (0.0, 0.0, 0.0, 0.0).
@@ -175,14 +175,14 @@ class BoundingBoxDecode(PrimitiveWithInfer):
 
 class CheckValid(PrimitiveWithInfer):
     """
-    Check bounding box.
+    Checks bounding box.
 
-    Check whether the bounding box cross data and data border are valid.
+    Checks whether the bounding box cross data and data border are valid.
 
     Inputs:
-        - **bboxes** (Tensor) - Bounding boxes tensor with shape (N, 4). Data type should be float16 or float32.
+        - **bboxes** (Tensor) - Bounding boxes tensor with shape (N, 4). Data type must be float16 or float32.
         - **img_metas** (Tensor) - Raw image size information with the format of (height, width, ratio).
-          Data type should be float16 or float32.
+          Data type must be float16 or float32.
 
     Outputs:
         Tensor, the valided tensor.
@@ -228,9 +228,9 @@ class CheckValid(PrimitiveWithInfer):
 
 class IOU(PrimitiveWithInfer):
     r"""
-    Calculate intersection over union for boxes.
+    Calculates intersection over union for boxes.
 
-    Compute the intersection over union (IOU) or the intersection over foreground (IOF) based on the ground-truth and
+    Computes the intersection over union (IOU) or the intersection over foreground (IOF) based on the ground-truth and
     predicted regions.
 
     .. math::
@@ -288,7 +288,7 @@ class IOU(PrimitiveWithInfer):
 
 class MakeRefKey(Primitive):
     """
-    Make a RefKey instance by string. RefKey stores the name of Parameter, can be passed through the functions,
+    Makes a RefKey instance by string. RefKey stores the name of Parameter, can be passed through the functions,
     and used for Assign target.
 
     Args:
@@ -328,7 +328,7 @@ class MakeRefKey(Primitive):
 
 class Partial(Primitive):
     """
-    Make a partial function instance, used for pynative mode.
+    Makes a partial function instance, used for pynative mode.
 
     Inputs:
         - **args** (Union[FunctionType, Tensor]) - The function and bind arguments.
@@ -390,7 +390,7 @@ class CheckBprop(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, prim_to_check=""):
-        """init CheckBprop"""
+        """Initialize CheckBprop"""
         self.prim_to_check = prim_to_check
 
     def infer_shape(self, xshapes, yshapes):
@@ -437,7 +437,7 @@ class CheckBprop(PrimitiveWithInfer):
 
 class ConfusionMatrix(PrimitiveWithInfer):
     r"""
-    Calculate the confusion matrix from labels and predictions.
+    Calculates the confusion matrix from labels and predictions.
 
     Args:
         num_classes (int): The num of classes.
@@ -484,10 +484,10 @@ class ConfusionMatrix(PrimitiveWithInfer):
 
 class PopulationCount(PrimitiveWithInfer):
     r"""
-    Calculate population count.
+    Calculates population count.
 
     Inputs:
-        - **input** (Tensor) -  The data type should be int16 or uint16.
+        - **input** (Tensor) -  The data type must be int16 or uint16.
 
     Outputs:
         Tensor, with the sam  shape as the input.
@@ -512,7 +512,7 @@ class PopulationCount(PrimitiveWithInfer):
 
 class Push(PrimitiveWithInfer):
     """
-    Pushing the inputs of the corresponding optimizer to parameter server.
+    Pushes the inputs of the corresponding optimizer to parameter server.
 
     Args:
         optim_type (string): The optimizer type. Default: 'ApplyMomentum'.
@@ -529,7 +529,7 @@ class Push(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, optim_type='ApplyMomentum', only_shape_indices=None):
-        """init Push"""
+        """Initialize Push"""
         self.add_prim_attr("primitive_target", "CPU")
         self.add_prim_attr("_side_effect", True)
         self.init_prim_io_names(inputs=['optim_inputs', 'optim_input_shapes'], outputs=['key'])
@@ -542,7 +542,7 @@ class Push(PrimitiveWithInfer):
 
 class Pull(PrimitiveWithInfer):
     """
-    Pulling weight from parameter server.
+    Pulls weight from parameter server.
 
     Inputs:
         - **key** (Tensor) - The key of the weight.
@@ -554,7 +554,7 @@ class Pull(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init Pull"""
+        """Initialize Pull"""
         self.add_prim_attr("primitive_target", "CPU")
         self.init_prim_io_names(inputs=['key', 'weight'], outputs=['output'])
 
@@ -566,7 +566,7 @@ class Pull(PrimitiveWithInfer):
 
 class identity(Primitive):
     """
-    Make a identify primitive, used for pynative mode.
+    Makes a identify primitive, used for pynative mode.
 
     Inputs:
         - **x** (Any) - identity input value.
diff --git a/mindspore/ops/operations/random_ops.py b/mindspore/ops/operations/random_ops.py
index b0bd7aa933..20eb74f908 100644
--- a/mindspore/ops/operations/random_ops.py
+++ b/mindspore/ops/operations/random_ops.py
@@ -27,8 +27,8 @@ class StandardNormal(PrimitiveWithInfer):
     Generates random numbers according to the standard Normal (or Gaussian) random number distribution.
 
     Args:
-        seed (int): Random seed. Must be non-negative. Default: 0.
-        seed2 (int): Random seed2. Must be non-negative. Default: 0.
+        seed (int): Random seed, must be non-negative. Default: 0.
+        seed2 (int): Random seed2, must be non-negative. Default: 0.
 
     Inputs:
         - **shape** (tuple) - The shape of random tensor to be generated. Only constant value is allowed.
@@ -44,7 +44,7 @@ class StandardNormal(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init StandardNormal"""
+        """Initialize StandardNormal"""
         self.init_prim_io_names(inputs=['shape'], outputs=['output'])
         validator.check_integer("seed", seed, 0, Rel.GE, self.name)
         validator.check_integer("seed2", seed2, 0, Rel.GE, self.name)
@@ -89,7 +89,7 @@ class StandardLaplace(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init StandardLaplace"""
+        """Initialize StandardLaplace"""
         self.init_prim_io_names(inputs=['shape'], outputs=['output'])
         validator.check_value_type('seed', seed, [int], self.name)
         validator.check_value_type('seed2', seed2, [int], self.name)
@@ -117,18 +117,18 @@ class Gamma(PrimitiveWithInfer):
         \text{P}(x|α,β) = \frac{\exp(-x/β)}{{β^α}\cdot{\Gamma(α)}}\cdot{x^{α-1}},
 
     Args:
-        seed (int): Random seed. Must be non-negative. Default: 0.
-        seed2 (int): Random seed2. Must be non-negative. Default: 0.
+        seed (int): Random seed, must be non-negative. Default: 0.
+        seed2 (int): Random seed2, must be non-negative. Default: 0.
 
     Inputs:
         - **shape** (tuple) - The shape of random tensor to be generated. Only constant value is allowed.
-        - **alpha** (Tensor) - The α distribution parameter. It should be greater than 0.
+        - **alpha** (Tensor) - The α distribution parameter. It must be greater than 0.
           It is also known as the shape parameter with float32 data type.
-        - **beta** (Tensor) - The β distribution parameter. It should be greater than 0.
+        - **beta** (Tensor) - The β distribution parameter. It must be greater than 0.
           It is also known as the scale parameter with float32 data type.
 
     Outputs:
-        Tensor. The shape should be the broadcasted shape of Input "shape" and shapes of alpha and beta.
+        Tensor. The shape must be the broadcasted shape of Input "shape" and shapes of alpha and beta.
         The dtype is float32.
 
     Examples:
@@ -141,7 +141,7 @@ class Gamma(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init Gamma"""
+        """Initialize Gamma"""
         self.init_prim_io_names(inputs=['shape', 'alpha', 'beta'], outputs=['output'])
         validator.check_integer("seed", seed, 0, Rel.GE, self.name)
         validator.check_integer("seed2", seed2, 0, Rel.GE, self.name)
@@ -172,16 +172,16 @@ class Poisson(PrimitiveWithInfer):
         \text{P}(i|μ) = \frac{\exp(-μ)μ^{i}}{i!},
 
     Args:
-        seed (int): Random seed. Must be non-negative. Default: 0.
-        seed2 (int): Random seed2. Must be non-negative. Default: 0.
+        seed (int): Random seed, must be non-negative. Default: 0.
+        seed2 (int): Random seed2, must be non-negative. Default: 0.
 
     Inputs:
         - **shape** (tuple) - The shape of random tensor to be generated. Only constant value is allowed.
         - **mean** (Tensor) - μ parameter the distribution was constructed with. The parameter defines mean number
-          of occurrences of the event. It should be greater than 0. With float32 data type.
+          of occurrences of the event. It must be greater than 0. With float32 data type.
 
     Outputs:
-        Tensor. Its shape should be the broadcasted shape of `shape` and the shape of `mean`.
+        Tensor. Its shape must be the broadcasted shape of `shape` and the shape of `mean`.
         The dtype is int32.
 
     Examples:
@@ -193,7 +193,7 @@ class Poisson(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init Poisson"""
+        """Initialize Poisson"""
         self.init_prim_io_names(inputs=['shape', 'mean'], outputs=['output'])
         validator.check_integer("seed", seed, 0, Rel.GE, self.name)
         validator.check_integer("seed2", seed2, 0, Rel.GE, self.name)
@@ -223,11 +223,11 @@ class UniformInt(PrimitiveWithInfer):
         \text{P}(i|a,b) = \frac{1}{b-a+1},
 
     Note:
-        The number in tensor minval should be strictly less than maxval at any position after broadcasting.
+        The number in tensor minval must be strictly less than maxval at any position after broadcasting.
 
     Args:
-        seed (int): Random seed. Must be non-negative. Default: 0.
-        seed2 (int): Random seed2. Must be non-negative. Default: 0.
+        seed (int): Random seed, must be non-negative. Default: 0.
+        seed2 (int): Random seed2, must be non-negative. Default: 0.
 
     Inputs:
         - **shape** (tuple) - The shape of random tensor to be generated. Only constant value is allowed.
@@ -249,7 +249,7 @@ class UniformInt(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init UniformInt"""
+        """Initialize UniformInt"""
         self.init_prim_io_names(inputs=['shape', 'minval', 'maxval'], outputs=['output'])
         validator.check_integer("seed", seed, 0, Rel.GE, self.name)
         validator.check_integer("seed2", seed2, 0, Rel.GE, self.name)
@@ -279,8 +279,8 @@ class UniformReal(PrimitiveWithInfer):
     Produces random floating-point values i, uniformly distributed to the interval [0, 1).
 
     Args:
-        seed (int): Random seed. Must be non-negative. Default: 0.
-        seed2 (int): Random seed2. Must be non-negative. Default: 0.
+        seed (int): Random seed, must be non-negative. Default: 0.
+        seed2 (int): Random seed2, must be non-negative. Default: 0.
 
     Inputs:
         - **shape** (tuple) - The shape of random tensor to be generated. Only constant value is allowed.
@@ -296,7 +296,7 @@ class UniformReal(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, seed=0, seed2=0):
-        """Init UniformReal"""
+        """Initialize UniformReal"""
         self.init_prim_io_names(inputs=['shape'], outputs=['output'])
         validator.check_integer("seed", seed, 0, Rel.GE, self.name)
         validator.check_integer("seed2", seed2, 0, Rel.GE, self.name)
@@ -325,13 +325,13 @@ class RandomChoiceWithMask(PrimitiveWithInfer):
     sample, while the mask tensor denotes which elements in the index tensor are valid.
 
     Args:
-        count (int): Number of items expected to get and the number should be greater than 0. Default: 256.
+        count (int): Number of items expected to get and the number must be greater than 0. Default: 256.
         seed (int): Random seed. Default: 0.
         seed2 (int): Random seed2. Default: 0.
 
     Inputs:
         - **input_x** (Tensor[bool]) - The input tensor.
-            The input tensor rank should be greater than or equal to 1 and less than or equal to 5.
+            The input tensor rank must be greater than or equal to 1 and less than or equal to 5.
 
     Outputs:
         Two tensors, the first one is the index tensor and the other one is the mask tensor.
@@ -347,7 +347,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, count=256, seed=0, seed2=0):
-        """Init RandomChoiceWithMask"""
+        """Initialize RandomChoiceWithMask"""
         validator.check_value_type("count", count, [int], self.name)
         validator.check_integer("count", count, 0, Rel.GT, self.name)
         validator.check_value_type('seed', seed, [int], self.name)
@@ -368,7 +368,7 @@ class RandomCategorical(PrimitiveWithInfer):
     Generates random samples from a given categorical distribution tensor.
 
     Args:
-        dtype (mindspore.dtype): The type of output. Its value should be one of mindspore.int16,
+        dtype (mindspore.dtype): The type of output. Its value must be one of mindspore.int16,
             mindspore.int32 and mindspore.int64. Default: mindspore.int64.
 
     Inputs:
@@ -395,7 +395,7 @@ class RandomCategorical(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self, dtype=mstype.int64):
-        """Init RandomCategorical"""
+        """Initialize RandomCategorical"""
         self.dtype = dtype
 
         valid_values = (mstype.int32, mstype.int16, mstype.int64)
diff --git a/mindspore/ops/operations/sparse_ops.py b/mindspore/ops/operations/sparse_ops.py
index 3e3f1d899f..e9977c5e62 100644
--- a/mindspore/ops/operations/sparse_ops.py
+++ b/mindspore/ops/operations/sparse_ops.py
@@ -24,7 +24,7 @@ from ..primitive import PrimitiveWithInfer, prim_attr_register
 
 class SparseToDense(PrimitiveWithInfer):
     """
-    Convert a sparse representation into a dense tensor.
+    Converts a sparse representation into a dense tensor.
 
     Inputs:
         - **indices** (Tensor) - The indices of sparse representation.
@@ -43,7 +43,7 @@ class SparseToDense(PrimitiveWithInfer):
 
     @prim_attr_register
     def __init__(self):
-        """init index_select"""
+        """Initialize index_select"""
         self.init_prim_io_names(inputs=['indices', 'values', 'dense_shape'], outputs=['output'])
 
     def __infer__(self, indices, values, dense_shape):