From 9065ee16e466365fed39eba30c7853fca8cdf1da Mon Sep 17 00:00:00 2001
From: bingyaweng <wengbingya@huawei.com>
Date: Wed, 9 Sep 2020 09:37:46 +0800
Subject: [PATCH] modify API comments

---
 mindspore/nn/probability/README.md            |  2 +-
 mindspore/nn/probability/bijector/__init__.py |  4 +-
 mindspore/nn/probability/bijector/bijector.py | 28 ++++----
 mindspore/nn/probability/bijector/exp.py      | 11 +--
 .../probability/bijector/power_transform.py   | 17 +++--
 .../nn/probability/bijector/scalar_affine.py  | 15 ++--
 mindspore/nn/probability/bijector/softplus.py |  7 +-
 .../nn/probability/bnn_layers/__init__.py     |  4 +-
 .../bnn_layers/bnn_cell_wrapper.py            |  6 +-
 .../bnn_layers/conv_variational.py            | 48 ++++++-------
 .../bnn_layers/dense_variational.py           | 24 +++----
 .../nn/probability/distribution/__init__.py   |  4 +-
 .../nn/probability/distribution/bernoulli.py  | 52 +++++++-------
 .../probability/distribution/categorical.py   | 25 +++----
 .../probability/distribution/distribution.py  | 72 +++++++++----------
 .../probability/distribution/exponential.py   | 54 +++++++-------
 .../nn/probability/distribution/geometric.py  | 45 ++++++------
 .../nn/probability/distribution/normal.py     | 66 ++++++++---------
 .../distribution/transformed_distribution.py  |  6 +-
 .../nn/probability/distribution/uniform.py    | 61 ++++++++--------
 mindspore/nn/probability/dpn/__init__.py      |  2 -
 mindspore/nn/probability/dpn/vae/cvae.py      | 21 +++---
 mindspore/nn/probability/dpn/vae/vae.py       | 15 ++--
 mindspore/nn/probability/infer/__init__.py    |  2 +-
 .../nn/probability/infer/variational/elbo.py  |  4 +-
 .../nn/probability/infer/variational/svi.py   |  4 +-
 .../toolbox/uncertainty_evaluation.py         | 10 +--
 .../nn/probability/transforms/__init__.py     |  4 +-
 .../probability/transforms/transform_bnn.py   |  8 +--
 29 files changed, 314 insertions(+), 307 deletions(-)

diff --git a/mindspore/nn/probability/README.md b/mindspore/nn/probability/README.md
index d3d3fd38f1..3ebeed5986 100644
--- a/mindspore/nn/probability/README.md
+++ b/mindspore/nn/probability/README.md
@@ -124,7 +124,7 @@ if __name__ == "__main__":
     epoch = 100
 
     for i in range(epoch):
-        train_loss, train_acc = train_model(train_bnn_network, test_set)
+        train_loss, train_acc = train_model(train_bnn_network, network, train_set)
 
         valid_acc = validate_model(network, test_set)
 
diff --git a/mindspore/nn/probability/bijector/__init__.py b/mindspore/nn/probability/bijector/__init__.py
index 8caf4e428f..d6eb8c175e 100644
--- a/mindspore/nn/probability/bijector/__init__.py
+++ b/mindspore/nn/probability/bijector/__init__.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Bijector.
-
-The high-level components(Bijectors) used to construct the probabilistic network.
+Bijectors are the high-level components used to construct the probabilistic network.
 """
 
 from .bijector import Bijector
diff --git a/mindspore/nn/probability/bijector/bijector.py b/mindspore/nn/probability/bijector/bijector.py
index 4760efd4b6..11241ed833 100644
--- a/mindspore/nn/probability/bijector/bijector.py
+++ b/mindspore/nn/probability/bijector/bijector.py
@@ -25,11 +25,11 @@ class Bijector(Cell):
     Bijecotr class.
 
     Args:
-        is_constant_jacobian (bool): if the bijector has constant derivative. Default: False.
-        is_injective (bool): if the bijector is an one-to-one mapping. Default: True.
-        name (str): name of the bijector. Default: None.
-        dtype (mindspore.dtype): type of the distribution the bijector can operate on. Default: None.
-        param (dict): parameters used to initialize the bijector. Default: None.
+        is_constant_jacobian (bool): Whether the Bijector has constant derivative. Default: False.
+        is_injective (bool): Whether the Bijector is a one-to-one mapping. Default: True.
+        name (str): The name of the Bijector. Default: None.
+        dtype (mindspore.dtype): The type of the distribution the Bijector can operate on. Default: None.
+        param (dict): The parameters used to initialize the Bijector. Default: None.
     """
     def __init__(self,
                  is_constant_jacobian=False,
@@ -82,7 +82,7 @@ class Bijector(Cell):
 
     def _check_value(self, value, name):
         """
-        Check availability fo value as a Tensor.
+        Check availability of `value` as a Tensor.
         """
         if self.context_mode == 0:
             self.checktensor(value, name)
@@ -119,11 +119,11 @@ class Bijector(Cell):
         This __call__ may go into two directions:
         If args[0] is a distribution instance, the call will generate a new distribution derived from
         the input distribution.
-        Otherwise, input[0] should be the name of a bijector function, e.g. "forward", then this call will
-        go in the construct and invoke the correstpoding bijector function.
+        Otherwise, input[0] should be the name of a Bijector function, e.g. "forward", then this call will
+        go in the construct and invoke the correstpoding Bijector function.
 
         Args:
-            *args: args[0] shall be either a distribution or the name of a bijector function.
+            *args: args[0] shall be either a distribution or the name of a Bijector function.
         """
         if isinstance(args[0], Distribution):
             return TransformedDistribution(self, args[0], self.distribution.dtype)
@@ -131,16 +131,16 @@ class Bijector(Cell):
 
     def construct(self, name, *args, **kwargs):
         """
-        Override construct in Cell.
+        Override `construct` in Cell.
 
         Note:
             Names of supported functions include:
-            'forward', 'inverse', 'forward_log_jacobian', 'inverse_log_jacobian'.
+            'forward', 'inverse', 'forward_log_jacobian', and 'inverse_log_jacobian'.
 
         Args:
-            name (str): name of the function.
-            *args (list): list of positional arguments needed for the function.
-            **kwargs (dictionary): dictionary of keyword arguments needed for the function.
+            name (str): The name of the function.
+            *args (list): A list of positional arguments that the function needs.
+            **kwargs (dictionary): A dictionary of keyword arguments that the function needs.
         """
         if name == 'forward':
             return self.forward(*args, **kwargs)
diff --git a/mindspore/nn/probability/bijector/exp.py b/mindspore/nn/probability/bijector/exp.py
index 535b1b5fa9..881ee4c06b 100644
--- a/mindspore/nn/probability/bijector/exp.py
+++ b/mindspore/nn/probability/bijector/exp.py
@@ -18,17 +18,20 @@ from .power_transform import PowerTransform
 class Exp(PowerTransform):
     r"""
     Exponential Bijector.
-    This Bijector performs the operation: Y = exp(x).
+    This Bijector performs the operation:
+
+    .. math::
+        Y = exp(x).
 
     Args:
-        name (str): name of the bijector. Default: 'Exp'.
+        name (str): The name of the Bijector. Default: 'Exp'.
 
     Examples:
-        >>> # To initialize a Exp bijector
+        >>> # To initialize an Exp bijector
         >>> import mindspore.nn.probability.bijector as msb
         >>> n = msb.Exp()
         >>>
-        >>> # To use Exp distribution in a network
+        >>> # To use Exp bijector in a network
         >>> class net(Cell):
         >>>     def __init__(self):
         >>>         super(net, self).__init__():
diff --git a/mindspore/nn/probability/bijector/power_transform.py b/mindspore/nn/probability/bijector/power_transform.py
index 696749692d..53791c15d8 100644
--- a/mindspore/nn/probability/bijector/power_transform.py
+++ b/mindspore/nn/probability/bijector/power_transform.py
@@ -23,20 +23,25 @@ from .bijector import Bijector
 class PowerTransform(Bijector):
     r"""
     Power Bijector.
-    This Bijector performs the operation: Y = g(X) = (1 + X * c)^(1 / c), X >= -1 / c, where c >= 0 is the power.
+    This Bijector performs the operation:
+
+    .. math::
+        Y = g(X) = (1 + X * c)^{1 / c}, X >= -1 / c
+    where c >= 0 is the power.
 
     The power transform maps inputs from `[-1/c, inf]` to `[0, inf]`.
 
-    This bijector is equivalent to the `Exp` bijector when `c=0`
+    This Bijector is equivalent to the `Exp` bijector when `c=0`
 
     Raises:
         ValueError: If the power is less than 0 or is not known statically.
 
     Args:
-        power (int or float): scale factor. Default: 0.
-        name (str): name of the bijector. Default: 'PowerTransform'.
-        param (dict): parameters used to initialize the bijector. This is only used when other bijectors that inherits
-          from powertransform passing in parameters. In this case the derived bijector may overwrite the param args.
+        power (int or float): The scale factor. Default: 0.
+        name (str): The name of the bijector. Default: 'PowerTransform'.
+        param (dict): The parameters used to initialize the bijector. These parameters are only used when other
+            Bijectors inherit from powertransform to pass in parameters. In this case the derived Bijector may overwrite
+            the argument `param`.
           Default: None.
 
     Examples:
diff --git a/mindspore/nn/probability/bijector/scalar_affine.py b/mindspore/nn/probability/bijector/scalar_affine.py
index a142c5f4f2..47b2e5fc7c 100644
--- a/mindspore/nn/probability/bijector/scalar_affine.py
+++ b/mindspore/nn/probability/bijector/scalar_affine.py
@@ -23,13 +23,16 @@ from .bijector import Bijector
 class ScalarAffine(Bijector):
     """
     Scalar Affine Bijector.
-    This Bijector performs the operation: Y = a * X + b, where a is the scale
-    factor and b is the shift factor.
+    This Bijector performs the operation:
+
+    .. math::
+        Y = a * X + b
+    where a is the scale factor and b is the shift factor.
 
     Args:
-        scale (float): scale factor. Default: 1.0.
-        shift (float): shift factor. Default: 0.0.
-        name (str): name of the bijector. Default: 'ScalarAffine'.
+        scale (float): The scale factor. Default: 1.0.
+        shift (float): The shift factor. Default: 0.0.
+        name (str): The name of the bijector. Default: 'ScalarAffine'.
 
     Examples:
         >>> # To initialize a ScalarAffine bijector of scale 1 and shift 2
@@ -55,7 +58,7 @@ class ScalarAffine(Bijector):
                  shift=0.0,
                  name='ScalarAffine'):
         """
-        Constructor of scalar affine bijector.
+        Constructor of scalar affine Bijector.
         """
         param = dict(locals())
         validator.check_value_type('scale', scale, [int, float], type(self).__name__)
diff --git a/mindspore/nn/probability/bijector/softplus.py b/mindspore/nn/probability/bijector/softplus.py
index d2e6102e3b..b06daf2f70 100644
--- a/mindspore/nn/probability/bijector/softplus.py
+++ b/mindspore/nn/probability/bijector/softplus.py
@@ -26,14 +26,15 @@ from .bijector import Bijector
 class Softplus(Bijector):
     r"""
     Softplus Bijector.
-    This Bijector performs the operation, where k is the sharpness factor.
+    This Bijector performs the operation:
 
     .. math::
         Y = \frac{\log(1 + e ^ {kX})}{k}
+    where k is the sharpness factor.
 
     Args:
-        sharpness (float): scale factor. Default: 1.0.
-        name (str): name of the bijector. Default: 'Softplus'.
+        sharpness (float): The scale factor. Default: 1.0.
+        name (str): The name of the Bijector. Default: 'Softplus'.
 
     Examples:
         >>> # To initialize a Softplus bijector of sharpness 2
diff --git a/mindspore/nn/probability/bnn_layers/__init__.py b/mindspore/nn/probability/bnn_layers/__init__.py
index f57c5593d1..abc719eb5a 100644
--- a/mindspore/nn/probability/bnn_layers/__init__.py
+++ b/mindspore/nn/probability/bnn_layers/__init__.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Bayesian Layer.
-
-The high-level components(Cells) used to construct the bayesian neural network.
+`bnn_layers` are the high-level components used to construct the bayesian neural network.
 
 """
 from . import conv_variational, dense_variational, layer_distribution, bnn_cell_wrapper
diff --git a/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py b/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py
index 0ddf753571..6912beafdc 100644
--- a/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py
+++ b/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py
@@ -39,13 +39,13 @@ class ClassWrap:
 @ClassWrap
 class WithBNNLossCell:
     r"""
-    Generate WithLossCell suitable for BNN.
+    Generate a suitable WithLossCell for BNN to wrap the bayesian network with loss function.
 
     Args:
         backbone (Cell): The target network.
         loss_fn (Cell): The loss function used to compute loss.
-        dnn_factor(int, float): The coefficient of backbone's loss, which is computed by loss functin. Default: 1.
-        bnn_factor(int, float): The coefficient of kl loss, which is kl divergence of Bayesian layer. Default: 1.
+        dnn_factor(int, float): The coefficient of backbone's loss, which is computed by the loss function. Default: 1.
+        bnn_factor(int, float): The coefficient of KL loss, which is the KL divergence of Bayesian layer. Default: 1.
 
     Inputs:
         - **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
diff --git a/mindspore/nn/probability/bnn_layers/conv_variational.py b/mindspore/nn/probability/bnn_layers/conv_variational.py
index 727adab866..3f5ec10ec5 100644
--- a/mindspore/nn/probability/bnn_layers/conv_variational.py
+++ b/mindspore/nn/probability/bnn_layers/conv_variational.py
@@ -178,34 +178,34 @@ class ConvReparam(_ConvVariational):
     r"""
     Convolutional variational layers with Reparameterization.
 
-    See more details in paper `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
+    For more details, refer to the paper `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
 
     Args:
         in_channels (int): The number of input channel :math:`C_{in}`.
         out_channels (int): The number of output channel :math:`C_{out}`.
-        kernel_size (Union[int, tuple[int]]): The data type is int or
-            tuple with 2 integers. Specifies the height and width of the 2D
-            convolution window. Single int means the value if for both
-            height and width of the kernel. A tuple of 2 ints means the
-            first value is for the height and the other is for the width of
-            the kernel.
+        kernel_size (Union[int, tuple[int]]): The data type is an integer or
+            a tuple of 2 integers. The kernel size specifies the height and
+            width of the 2D convolution window. a single integer stands for the
+            value is for both height and width of the kernel. With the `kernel_size`
+            being a tuple of 2 integers, the first value is for the height and the other
+            is the width of the kernel.
         stride(Union[int, tuple[int]]): The distance of kernel moving,
-            an int number that represents the height and width of movement
-            are both strides, or a tuple of two int numbers that represent
+            an integer number represents that the height and width of movement
+            are both strides, or a tuple of two integers numbers represents that
             height and width of movement respectively. Default: 1.
-        pad_mode (str): Specifies padding mode. The optional values are
-            "same", "valid", "pad". Default: "same".
+        pad_mode (str): Specifies the padding mode. The optional values are
+            "same", "valid", and "pad". Default: "same".
 
             - same: Adopts the way of completion. Output height and width
               will be the same as the input.
-              Total number of padding will be calculated for horizontal and
-              vertical direction and evenly distributed to top and bottom,
+              The total number of padding will be calculated for in horizontal and
+              vertical directions and evenly distributed to top and bottom,
               left and right if possible. Otherwise, the last extra padding
               will be done from the bottom and the right side. If this mode
               is set, `padding` must be 0.
 
-            - valid: Adopts the way of discarding. The possibly largest
-              height and width of output will be return without padding.
+            - valid: Adopts the way of discarding. The possible largest
+              height and width of the output will be returned without padding.
               Extra pixels will be discarded. If this mode is set, `padding`
               must be 0.
 
@@ -215,9 +215,9 @@ class ConvReparam(_ConvVariational):
 
         padding (Union[int, tuple[int]]): Implicit paddings on both sides of
             the input. Default: 0.
-        dilation (Union[int, tuple[int]]): The data type is int or tuple
-            with 2 integers. Specifies the dilation rate to use for dilated
-            convolution. If set to be :math:`k > 1`,
+        dilation (Union[int, tuple[int]]): The data type is an integer or a tuple
+            of 2 integers. This parameter specifies the dilation rate of the
+            dilated convolution. If set to be :math:`k > 1`,
             there will be :math:`k - 1` pixels skipped for each sampling
             location. Its value should be greater or equal to 1 and bounded
             by the height and width of the input. Default: 1.
@@ -226,28 +226,28 @@ class ConvReparam(_ConvVariational):
             Default: 1.
         has_bias (bool): Specifies whether the layer uses a bias vector.
             Default: False.
-        weight_prior_fn: prior distribution for weight.
+        weight_prior_fn: The prior distribution for weight.
             It should return a mindspore distribution instance.
             Default: NormalPrior. (which creates an instance of standard
             normal distribution). The current version only supports normal distribution.
-        weight_posterior_fn: posterior distribution for sampling weight.
+        weight_posterior_fn: The posterior distribution for sampling weight.
             It should be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
-        bias_prior_fn: prior distribution for bias vector. It should return
+        bias_prior_fn: The prior distribution for bias vector. It should return
             a mindspore distribution. Default: NormalPrior(which creates an
             instance of standard normal distribution). The current version
             only supports normal distribution.
-        bias_posterior_fn: posterior distribution for sampling bias vector.
+        bias_posterior_fn: The posterior distribution for sampling bias vector.
             It should be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
 
     Inputs:
-        - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
+        - **input** (Tensor) - The shape of the tensor is :math:`(N, C_{in}, H_{in}, W_{in})`.
 
     Outputs:
-        Tensor of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
+        Tensor, with the shape being :math:`(N, C_{out}, H_{out}, W_{out})`.
 
     Examples:
         >>> net = ConvReparam(120, 240, 4, has_bias=False)
diff --git a/mindspore/nn/probability/bnn_layers/dense_variational.py b/mindspore/nn/probability/bnn_layers/dense_variational.py
index b102392e01..af2fba7b97 100644
--- a/mindspore/nn/probability/bnn_layers/dense_variational.py
+++ b/mindspore/nn/probability/bnn_layers/dense_variational.py
@@ -143,12 +143,12 @@ class DenseReparam(_DenseVariational):
     r"""
     Dense variational layers with Reparameterization.
 
-    See more details in paper `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
+    For more details, refer to the paper `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
 
-    Applies dense-connected layer for the input. This layer implements the operation as:
+    Applies dense-connected layer to the input. This layer implements the operation as:
 
     .. math::
-        \text{outputs} = \text{activation}(\text{inputs} * \text{kernel} + \text{bias}),
+        \text{outputs} = \text{activation}(\text{inputs} * \text{weight} + \text{bias}),
 
     where :math:`\text{activation}` is the activation function passed as the activation
     argument (if passed in), :math:`\text{activation}` is a weight matrix with the same
@@ -162,31 +162,31 @@ class DenseReparam(_DenseVariational):
         in_channels (int): The number of input channel.
         out_channels (int): The number of output channel .
         has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
-        activation (str, Cell): Regularizer function applied to the output of the layer. The type of activation can
-            be str (eg. 'relu') or Cell (eg. nn.ReLU()). Note that if the type of activation is Cell, it must have been
-            instantiated. Default: None.
-        weight_prior_fn: prior distribution for weight.
+        activation (str, Cell): A regularization function applied to the output of the layer. The type of `activation`
+            can be a string (eg. 'relu') or a Cell (eg. nn.ReLU()). Note that if the type of activation is Cell, it must
+            be instantiated beforehand. Default: None.
+        weight_prior_fn: The prior distribution for weight.
             It should return a mindspore distribution instance.
             Default: NormalPrior. (which creates an instance of standard
             normal distribution). The current version only supports normal distribution.
-        weight_posterior_fn: posterior distribution for sampling weight.
+        weight_posterior_fn: The posterior distribution for sampling weight.
             It should be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
-        bias_prior_fn: prior distribution for bias vector. It should return
+        bias_prior_fn: The prior distribution for bias vector. It should return
             a mindspore distribution. Default: NormalPrior(which creates an
             instance of standard normal distribution). The current version
             only supports normal distribution.
-        bias_posterior_fn: posterior distribution for sampling bias vector.
+        bias_posterior_fn: The posterior distribution for sampling bias vector.
             It should be a function handle which returns a mindspore
             distribution instance. Default: lambda name, shape: NormalPosterior(name=name, shape=shape).
             The current version only supports normal distribution.
 
     Inputs:
-        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
+        - **input** (Tensor) - The shape of the tensor is :math:`(N, in\_channels)`.
 
     Outputs:
-        Tensor of shape :math:`(N, out\_channels)`.
+        Tensor, the shape of the tensor is :math:`(N, out\_channels)`.
 
     Examples:
         >>> net = DenseReparam(3, 4)
diff --git a/mindspore/nn/probability/distribution/__init__.py b/mindspore/nn/probability/distribution/__init__.py
index bf30bae154..2f83d36ab0 100644
--- a/mindspore/nn/probability/distribution/__init__.py
+++ b/mindspore/nn/probability/distribution/__init__.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Distribution.
-
-The high-level components(Distributions) used to construct the probabilistic network.
+Distributions are the high-level components used to construct the probabilistic network.
 """
 
 from .distribution import Distribution
diff --git a/mindspore/nn/probability/distribution/bernoulli.py b/mindspore/nn/probability/distribution/bernoulli.py
index 46423b2e6b..8c0d5cd5a3 100644
--- a/mindspore/nn/probability/distribution/bernoulli.py
+++ b/mindspore/nn/probability/distribution/bernoulli.py
@@ -26,16 +26,16 @@ class Bernoulli(Distribution):
     Bernoulli Distribution.
 
     Args:
-        probs (float, list, numpy.ndarray, Tensor, Parameter): probability of 1 as outcome.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): type of the distribution. Default: mstype.int32.
-        name (str): name of the distribution. Default: Bernoulli.
+        probs (float, list, numpy.ndarray, Tensor, Parameter): The probability of that the outcome is 1.
+        seed (int): The global seed is used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the distribution. Default: mstype.int32.
+        name (str): The name of the distribution. Default: Bernoulli.
 
     Note:
-        probs should be proper probabilities (0 < p < 1).
-        dist_spec_args is probs.
+        `probs` should be a proper probability (0 < p < 1).
+        dist_spec_args is `probs`.
 
-    Examples:
+        Examples:
         >>> # To initialize a Bernoulli distribution of prob 0.5
         >>> import mindspore.nn.probability.distribution as msd
         >>> b = msd.Bernoulli(0.5, dtype=mstype.int32)
@@ -153,13 +153,13 @@ class Bernoulli(Distribution):
     @property
     def probs(self):
         """
-        Returns the probability for the outcome is 1.
+        Return the probability of that the outcome is 1.
         """
         return self._probs
 
     def _check_param(self, probs1):
         """
-        Check availablity of distribution specific args probs1.
+        Check availablity of distribution specific args `probs1`.
         """
         if probs1 is not None:
             if self.context_mode == 0:
@@ -207,25 +207,25 @@ class Bernoulli(Distribution):
         probs0 = 1.0 - probs1
         return -(probs0 * self.log(probs0)) - (probs1 * self.log(probs1))
 
-    def _cross_entropy(self, dist, probs1_b, probs1=None):
+    def _cross_entropy(self, dist, probs1_b, probs1_a=None):
         """
         Evaluate cross_entropy between Bernoulli distributions.
 
         Args:
-            dist (str): type of the distributions. Should be "Bernoulli" in this case.
-            probs1_b (Tensor): probs1 of distribution b.
-            probs1_a (Tensor): probs1 of distribution a. Default: self.probs.
+            dist (str): The type of the distributions. Should be "Bernoulli" in this case.
+            probs1_b (Tensor): `probs1` of distribution b.
+            probs1_a (Tensor): `probs1` of distribution a. Default: self.probs.
         """
         check_distribution_name(dist, 'Bernoulli')
-        return self._entropy(probs1) + self._kl_loss(dist, probs1_b, probs1)
+        return self._entropy(probs1_a) + self._kl_loss(dist, probs1_b, probs1_a)
 
     def _log_prob(self, value, probs1=None):
         r"""
         pmf of Bernoulli distribution.
 
         Args:
-            value (Tensor): a Tensor composed of only zeros and ones.
-            probs (Tensor): probability of outcome is 1. Default: self.probs.
+            value (Tensor): A Tensor composed of only zeros and ones.
+            probs (Tensor): The probability of outcome is 1. Default: self.probs.
 
         .. math::
             pmf(k) = probs1 if k = 1;
@@ -239,11 +239,11 @@ class Bernoulli(Distribution):
 
     def _cdf(self, value, probs1=None):
         r"""
-        cdf of Bernoulli distribution.
+        Cumulative distribution function (cdf) of Bernoulli distribution.
 
         Args:
-            value (Tensor): value to be evaluated.
-            probs (Tensor): probability of outcome is 1. Default: self.probs.
+            value (Tensor): The value to be evaluated.
+            probs (Tensor): The probability of that the outcome is 1. Default: self.probs.
 
         .. math::
             cdf(k) = 0 if k < 0;
@@ -264,14 +264,14 @@ class Bernoulli(Distribution):
         less_than_zero = self.select(comp_zero, zeros, probs0)
         return self.select(comp_one, less_than_zero, ones)
 
-    def _kl_loss(self, dist, probs1_b, probs1=None):
+    def _kl_loss(self, dist, probs1_b, probs1_a=None):
         r"""
         Evaluate bernoulli-bernoulli kl divergence, i.e. KL(a||b).
 
         Args:
-            dist (str): type of the distributions. Should be "Bernoulli" in this case.
-            probs1_b (Tensor, Number): probs1 of distribution b.
-            probs1_a (Tensor, Number): probs1 of distribution a. Default: self.probs.
+            dist (str): The type of the distributions. Should be "Bernoulli" in this case.
+            probs1_b (Tensor, Number): `probs1` of distribution b.
+            probs1_a (Tensor, Number): `probs1` of distribution a. Default: self.probs.
 
         .. math::
             KL(a||b) = probs1_a * \log(\frac{probs1_a}{probs1_b}) +
@@ -280,7 +280,7 @@ class Bernoulli(Distribution):
         check_distribution_name(dist, 'Bernoulli')
         probs1_b = self._check_value(probs1_b, 'probs1_b')
         probs1_b = self.cast(probs1_b, self.parameter_type)
-        probs1_a = self._check_param(probs1)
+        probs1_a = self._check_param(probs1_a)
         probs0_a = 1.0 - probs1_a
         probs0_b = 1.0 - probs1_b
         return probs1_a * self.log(probs1_a / probs1_b) + probs0_a * self.log(probs0_a / probs0_b)
@@ -290,8 +290,8 @@ class Bernoulli(Distribution):
         Sampling.
 
         Args:
-            shape (tuple): shape of the sample. Default: ().
-            probs (Tensor, Number): probs1 of the samples. Default: self.probs.
+            shape (tuple): The shape of the sample. Default: ().
+            probs1 (Tensor, Number): `probs1` of the samples. Default: self.probs.
 
         Returns:
             Tensor, shape is shape + batch_shape.
diff --git a/mindspore/nn/probability/distribution/categorical.py b/mindspore/nn/probability/distribution/categorical.py
index cb7dbcd7dc..a8492ad52a 100644
--- a/mindspore/nn/probability/distribution/categorical.py
+++ b/mindspore/nn/probability/distribution/categorical.py
@@ -23,17 +23,17 @@ from ._utils.utils import logits_to_probs, probs_to_logits, check_type, cast_to_
 
 class Categorical(Distribution):
     """
-    Creates a categorical distribution parameterized by either probs or logits (but not both).
+    Create a categorical distribution parameterized by either probabilities or logits (but not both).
 
     Args:
-        probs (Tensor, list, numpy.ndarray, Parameter): event probabilities.
-        logits (Tensor, list, numpy.ndarray, Parameter, float): event log-odds.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mstype.int32): type of the distribution. Default: mstype.int32.
-        name (str): name of the distribution. Default: Categorical.
+        probs (Tensor, list, numpy.ndarray, Parameter): Event probabilities.
+        logits (Tensor, list, numpy.ndarray, Parameter, float): Event log-odds.
+        seed (int): The global seed is used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mstype.int32): The type of the distribution. Default: mstype.int32.
+        name (str): The name of the distribution. Default: Categorical.
 
     Note:
-        probs must be non-negative, finite and have a non-zero sum, and it will be normalized to sum to 1.
+        `probs` must be non-negative, finite and have a non-zero sum, and it will be normalized to sum to 1.
 
     Examples:
         >>> # To initialize a Categorical distribution of prob is [0.5, 0.5]
@@ -111,14 +111,14 @@ class Categorical(Distribution):
     @property
     def logits(self):
         """
-        Returns the logits.
+        Return the logits.
         """
         return self._logits
 
     @property
     def probs(self):
         """
-        Returns the probability.
+        Return the probability.
         """
         return self._probs
 
@@ -127,7 +127,7 @@ class Categorical(Distribution):
         Sampling.
 
         Args:
-            sample_shape (tuple): shape of the sample. Default: ().
+            sample_shape (tuple): The shape of the sample. Default: ().
 
         Returns:
             Tensor, shape is shape(probs)[:-1] + sample_shape
@@ -149,7 +149,7 @@ class Categorical(Distribution):
         Evaluate log probability.
 
         Args:
-            value (Tensor): value to be evaluated.
+            value (Tensor): The value to be evaluated.
         """
         value = self._check_value(value, 'value')
         value = self.expandim(self.cast(value, mstype.float32), -1)
@@ -176,8 +176,9 @@ class Categorical(Distribution):
     def enumerate_support(self, expand=True):
         r"""
        Enumerate categories.
+
        Args:
-            expand (Bool): whether to expand.
+           expand (Bool): Whether to expand.
        """
         num_events = self._num_events
         values = nn.Range(0., num_events, 1)()
diff --git a/mindspore/nn/probability/distribution/distribution.py b/mindspore/nn/probability/distribution/distribution.py
index ccf66fa432..2388f54d7a 100644
--- a/mindspore/nn/probability/distribution/distribution.py
+++ b/mindspore/nn/probability/distribution/distribution.py
@@ -27,25 +27,24 @@ class Distribution(Cell):
     Base class for all mathematical distributions.
 
     Args:
-        seed (int): random seed used in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): the type of the event samples. Default: subclass dtype.
-        name (str): Python str name prefixed to Ops created by this class. Default: subclass name.
-        param (dict): parameters used to initialize the distribution.
+        seed (int): The global seed is used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the event samples. Default: subclass dtype.
+        name (str): Python string name prefixed to operations created by this class. Default: subclass name.
+        param (dict): The parameters used to initialize the distribution.
 
     Note:
-        Derived class should override operations such as ,_mean, _prob,
-        and _log_prob. Required arguments, such as value for _prob,
-        should be passed in through args or kwargs. dist_spec_args which specify
+        Derived class should override operations such as `_mean`, `_prob`,
+        and `_log_prob`. Required arguments, such as value for `_prob`,
+        should be passed in through `args` or `kwargs`. dist_spec_args which specify
         a new distribution are optional.
 
-        dist_spec_args are unique for each type of distribution. For example, mean and sd
-        are the dist_spec_args for a Normal distribution, while rate is the dist_spec_args
+        dist_spec_args are unique for each type of distribution. For example, `mean` and `sd`
+        are the dist_spec_args for a Normal distribution, while `rate` is the dist_spec_args
         for exponential distribution.
 
         For all functions, passing in dist_spec_args, is optional.
-        Passing in the additional dist_spec_args will make the result to be evaluated with
-        new distribution specified by the dist_spec_args. But it won't change the
-        original distribuion.
+        Passing in the additional dist_spec_args will evaluate the result to be evaluated with
+        new distribution specified by the dist_spec_args. But it will not change the original distribution.
     """
 
     def __init__(self,
@@ -118,7 +117,7 @@ class Distribution(Cell):
 
     def _check_value(self, value, name):
         """
-        Check availability fo value as a Tensor.
+        Check availability of `value` as a Tensor.
         """
         if self.context_mode == 0:
             self.checktensor(value, name)
@@ -127,7 +126,7 @@ class Distribution(Cell):
 
     def _set_prob(self):
         """
-        Set probability funtion based on the availability of _prob and _log_likehood.
+        Set probability funtion based on the availability of `_prob` and `_log_likehood`.
         """
         if hasattr(self, '_prob'):
             self._call_prob = self._prob
@@ -136,7 +135,7 @@ class Distribution(Cell):
 
     def _set_sd(self):
         """
-        Set standard deviation based on the availability of _sd and _var.
+        Set standard deviation based on the availability of `_sd` and `_var`.
         """
         if hasattr(self, '_sd'):
             self._call_sd = self._sd
@@ -145,7 +144,7 @@ class Distribution(Cell):
 
     def _set_var(self):
         """
-        Set variance based on the availability of _sd and _var.
+        Set variance based on the availability of `_sd` and `_var`.
         """
         if hasattr(self, '_var'):
             self._call_var = self._var
@@ -154,7 +153,7 @@ class Distribution(Cell):
 
     def _set_log_prob(self):
         """
-        Set log probability based on the availability of _prob and _log_prob.
+        Set log probability based on the availability of `_prob` and `_log_prob`.
         """
         if hasattr(self, '_log_prob'):
             self._call_log_prob = self._log_prob
@@ -163,7 +162,8 @@ class Distribution(Cell):
 
     def _set_cdf(self):
         """
-        Set cdf based on the availability of _cdf and _log_cdf and survival_functions.
+        Set cumulative distribution function (cdf) based on the availability of `_cdf` and `_log_cdf` and
+        `survival_functions`.
         """
         if hasattr(self, '_cdf'):
             self._call_cdf = self._cdf
@@ -176,8 +176,8 @@ class Distribution(Cell):
 
     def _set_survival(self):
         """
-        Set survival function based on the availability of _survival function and _log_survival
-        and _call_cdf.
+        Set survival function based on the availability of _survival function and `_log_survival`
+        and `_call_cdf`.
         """
         if hasattr(self, '_survival_function'):
             self._call_survival = self._survival_function
@@ -188,7 +188,7 @@ class Distribution(Cell):
 
     def _set_log_cdf(self):
         """
-        Set log cdf based on the availability of _log_cdf and _call_cdf.
+        Set log cdf based on the availability of `_log_cdf` and `_call_cdf`.
         """
         if hasattr(self, '_log_cdf'):
             self._call_log_cdf = self._log_cdf
@@ -197,7 +197,7 @@ class Distribution(Cell):
 
     def _set_log_survival(self):
         """
-        Set log survival based on the availability of _log_survival and _call_survival.
+        Set log survival based on the availability of `_log_survival` and `_call_survival`.
         """
         if hasattr(self, '_log_survival'):
             self._call_log_survival = self._log_survival
@@ -206,7 +206,7 @@ class Distribution(Cell):
 
     def _set_cross_entropy(self):
         """
-        Set log survival based on the availability of _cross_entropy.
+        Set log survival based on the availability of `_cross_entropy`.
         """
         if hasattr(self, '_cross_entropy'):
             self._call_cross_entropy = self._cross_entropy
@@ -216,7 +216,7 @@ class Distribution(Cell):
         Evaluate the log probability(pdf or pmf) at the given value.
 
         Note:
-            Args must include value.
+            The argument `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_log_prob(*args, **kwargs)
@@ -235,7 +235,7 @@ class Distribution(Cell):
         Evaluate the probability (pdf or pmf) at given value.
 
         Note:
-            Args must include value.
+            The argument `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_prob(*args, **kwargs)
@@ -254,7 +254,7 @@ class Distribution(Cell):
         Evaluate the cdf at given value.
 
         Note:
-            Args must include value.
+            The argument `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_cdf(*args, **kwargs)
@@ -291,7 +291,7 @@ class Distribution(Cell):
         Evaluate the log cdf at given value.
 
         Note:
-            Args must include value.
+            The argument `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_log_cdf(*args, **kwargs)
@@ -310,7 +310,7 @@ class Distribution(Cell):
         Evaluate the survival function at given value.
 
         Note:
-            Args must include value.
+            The argument `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_survival(*args, **kwargs)
@@ -338,7 +338,7 @@ class Distribution(Cell):
         Evaluate the log survival function at given value.
 
         Note:
-            Args must include value.
+            The arguments `args` must include `value`.
             dist_spec_args are optional.
         """
         return self._call_log_survival(*args, **kwargs)
@@ -357,7 +357,7 @@ class Distribution(Cell):
         Evaluate the KL divergence, i.e. KL(a||b).
 
         Note:
-            Args must include type of the distribution, parameters of distribution b.
+            The argument `args` must include the type of the distribution, parameters of distribution b.
             Parameters for distribution a are optional.
         """
         return self._kl_loss(*args, **kwargs)
@@ -430,7 +430,7 @@ class Distribution(Cell):
         Evaluate the cross_entropy between distribution a and b.
 
         Note:
-            Args must include type of the distribution, parameters of distribution b.
+            The argument `args` must include the type of the distribution, parameters of distribution b.
             Parameters for distribution a are optional.
         """
         return self._call_cross_entropy(*args, **kwargs)
@@ -456,17 +456,17 @@ class Distribution(Cell):
 
     def construct(self, name, *args, **kwargs):
         """
-        Override construct in Cell.
+        Override `construct` in Cell.
 
         Note:
             Names of supported functions include:
             'prob', 'log_prob', 'cdf', 'log_cdf', 'survival_function', 'log_survival'
-            'var', 'sd', 'entropy', 'kl_loss', 'cross_entropy', 'sample'.
+            'var', 'sd', 'entropy', 'kl_loss', 'cross_entropy', and 'sample'.
 
         Args:
-            name (str): name of the function.
-            *args (list): list of positional arguments needed for the function.
-            **kwargs (dictionary): dictionary of keyword arguments needed for the function.
+            name (str): The name of the function.
+            *args (list): A list of positional arguments that the function needs.
+            **kwargs (dictionary): A dictionary of keyword arguments that the function needs.
         """
 
         if name == 'log_prob':
diff --git a/mindspore/nn/probability/distribution/exponential.py b/mindspore/nn/probability/distribution/exponential.py
index 005b750d32..4170cde88a 100644
--- a/mindspore/nn/probability/distribution/exponential.py
+++ b/mindspore/nn/probability/distribution/exponential.py
@@ -27,17 +27,17 @@ class Exponential(Distribution):
     Example class: Exponential Distribution.
 
     Args:
-        rate (float, list, numpy.ndarray, Tensor, Parameter): inverse scale.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): type of the distribution. Default: mstype.float32.
-        name (str): name of the distribution. Default: Exponential.
+        rate (float, list, numpy.ndarray, Tensor, Parameter): The inverse scale.
+        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
+        name (str): The name of the distribution. Default: Exponential.
 
     Note:
-        rate should be strictly greater than 0.
-        dist_spec_args is rate.
-        dtype should be float type because Exponential distributions are continuous.
+        `rate` should be strictly greater than 0.
+        dist_spec_args is `rate`.
+        `dtype` should be float type because Exponential distributions are continuous.
 
-    Examples:
+        Examples:
         >>> # To initialize an Exponential distribution of rate 0.5
         >>> import mindspore.nn.probability.distribution as msd
         >>> e = msd.Exponential(0.5, dtype=mstype.float32)
@@ -162,7 +162,7 @@ class Exponential(Distribution):
 
     def _check_param(self, rate):
         """
-        Check availablity of distribution specific args rate.
+        Check availablity of distribution specific argument `rate`.
         """
         if rate is not None:
             if self.context_mode == 0:
@@ -209,9 +209,9 @@ class Exponential(Distribution):
         Evaluate cross_entropy between Exponential distributions.
 
         Args:
-            dist (str): type of the distributions. Should be "Exponential" in this case.
-            rate_b (Tensor): rate of distribution b.
-            rate_a (Tensor): rate of distribution a. Default: self.rate.
+            dist (str): The type of the distributions. Should be "Exponential" in this case.
+            rate_b (Tensor): The rate of distribution b.
+            rate_a (Tensor): The rate of distribution a. Default: self.rate.
         """
         check_distribution_name(dist, 'Exponential')
         return self._entropy(rate) + self._kl_loss(dist, rate_b, rate)
@@ -223,11 +223,11 @@ class Exponential(Distribution):
 
         Args:
             Args:
-            value (Tensor): value to be evaluated.
-            rate (Tensor): rate of the distribution. Default: self.rate.
+            value (Tensor): The value to be evaluated.
+            rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            Value should be greater or equal to zero.
+            `value` should be greater or equal to zero.
 
         .. math::
             log_pdf(x) = \log(rate) - rate * x if x >= 0 else 0
@@ -243,14 +243,14 @@ class Exponential(Distribution):
 
     def _cdf(self, value, rate=None):
         r"""
-        cdf of Exponential distribution.
+        Cumulative distribution function (cdf) of Exponential distribution.
 
         Args:
-            value (Tensor): value to be evaluated.
-            rate (Tensor): rate of the distribution. Default: self.rate.
+            value (Tensor): The value to be evaluated.
+            rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            Value should be greater or equal to zero.
+            `value` should be greater or equal to zero.
 
         .. math::
             cdf(x) = 1.0 - \exp(-1 * \lambda * x) if x >= 0 else 0
@@ -268,11 +268,11 @@ class Exponential(Distribution):
         log survival_function of Exponential distribution.
 
         Args:
-            value (Tensor): value to be evaluated.
-            rate (Tensor): rate of the distribution. Default: self.rate.
+            value (Tensor): The value to be evaluated.
+            rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Note:
-            Value should be greater or equal to zero.
+            `value` should be greater or equal to zero.
 
         .. math::
             log_survival_function(x) = -1 * \lambda * x if x >= 0 else 0
@@ -290,9 +290,9 @@ class Exponential(Distribution):
         Evaluate exp-exp kl divergence, i.e. KL(a||b).
 
         Args:
-            dist (str): type of the distributions. Should be "Exponential" in this case.
-            rate_b (Tensor): rate of distribution b.
-            rate_a (Tensor): rate of distribution a. Default: self.rate.
+            dist (str): The type of the distributions. Should be "Exponential" in this case.
+            rate_b (Tensor): The rate of distribution b.
+            rate_a (Tensor): The rate of distribution a. Default: self.rate.
         """
         check_distribution_name(dist, 'Exponential')
         rate_b = self._check_value(rate_b, 'rate_b')
@@ -305,8 +305,8 @@ class Exponential(Distribution):
         Sampling.
 
         Args:
-            shape (tuple): shape of the sample. Default: ().
-            rate (Tensor): rate of the distribution. Default: self.rate.
+            shape (tuple): The shape of the sample. Default: ().
+            rate (Tensor): The rate of the distribution. Default: self.rate.
 
         Returns:
             Tensor, shape is shape + batch_shape.
diff --git a/mindspore/nn/probability/distribution/geometric.py b/mindspore/nn/probability/distribution/geometric.py
index e5d1c56dfb..b746db0ad0 100644
--- a/mindspore/nn/probability/distribution/geometric.py
+++ b/mindspore/nn/probability/distribution/geometric.py
@@ -26,19 +26,20 @@ from ._utils.custom_ops import exp_generic, log_generic
 class Geometric(Distribution):
     """
     Geometric Distribution.
-    It represents k+1 Bernoulli trials needed to get one success, k is the number of failures.
+    It represents that there are k failures before the first sucess, namely taht there are in total k+1 Bernoulli trails
+    when the first success is achieved.
 
     Args:
-        probs (float, list, numpy.ndarray, Tensor, Parameter): probability of success.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): type of the distribution. Default: mstype.int32.
-        name (str): name of the distribution. Default: Geometric.
+        probs (float, list, numpy.ndarray, Tensor, Parameter): The probability of success.
+        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the distribution. Default: mstype.int32.
+        name (str): The name of the distribution. Default: Geometric.
 
     Note:
-        probs should be proper probabilities (0 < p < 1).
-        dist_spec_args is probs.
+        `probs` should be a proper probability (0 < p < 1).
+        dist_spec_args is `probs`.
 
-    Examples:
+        Examples:
         >>> # To initialize a Geometric distribution of prob 0.5
         >>> import mindspore.nn.probability.distribution as msd
         >>> n = msd.Geometric(0.5, dtype=mstype.int32)
@@ -159,7 +160,7 @@ class Geometric(Distribution):
     @property
     def probs(self):
         """
-        Returns the probability of success of the Bernoulli trail.
+        Return the probability of success of the Bernoulli trail.
         """
         return self._probs
 
@@ -213,9 +214,9 @@ class Geometric(Distribution):
         Evaluate cross_entropy between Geometric distributions.
 
         Args:
-            dist (str): type of the distributions. Should be "Geometric" in this case.
-            probs1_b (Tensor): probability of success of distribution b.
-            probs1_a (Tensor): probability of success of distribution a. Default: self.probs.
+            dist (str): The type of the distributions. Should be "Geometric" in this case.
+            probs1_b (Tensor): The probability of success of distribution b.
+            probs1_a (Tensor): The probability of success of distribution a. Default: self.probs.
         """
         check_distribution_name(dist, 'Geometric')
         return self._entropy(probs1) + self._kl_loss(dist, probs1_b, probs1)
@@ -225,8 +226,8 @@ class Geometric(Distribution):
         pmf of Geometric distribution.
 
         Args:
-            value (Tensor): a Tensor composed of only natural numbers.
-            probs (Tensor): probability of success. Default: self.probs.
+            value (Tensor): A Tensor composed of only natural numbers.
+            probs (Tensor): The probability of success. Default: self.probs.
 
         .. math::
             pmf(k) = probs0 ^k * probs1 if k >= 0;
@@ -243,11 +244,11 @@ class Geometric(Distribution):
 
     def _cdf(self, value, probs1=None):
         r"""
-        cdf of Geometric distribution.
+        Cumulative distribution function (cdf) of Geometric distribution.
 
         Args:
-            value (Tensor): a Tensor composed of only natural numbers.
-            probs (Tensor): probability of success. Default: self.probs.
+            value (Tensor): A Tensor composed of only natural numbers.
+            probs (Tensor): The probability of success. Default: self.probs.
 
         .. math::
             cdf(k) = 1 - probs0 ^ (k+1) if k >= 0;
@@ -269,9 +270,9 @@ class Geometric(Distribution):
         Evaluate Geometric-Geometric kl divergence, i.e. KL(a||b).
 
         Args:
-            dist (str): type of the distributions. Should be "Geometric" in this case.
-            probs1_b (Tensor): probability of success of distribution b.
-            probs1_a (Tensor): probability of success of distribution a. Default: self.probs.
+            dist (str): The type of the distributions. Should be "Geometric" in this case.
+            probs1_b (Tensor): The probability of success of distribution b.
+            probs1_a (Tensor): The probability of success of distribution a. Default: self.probs.
 
         .. math::
             KL(a||b) = \log(\frac{probs1_a}{probs1_b}) + \frac{probs0_a}{probs1_a} * \log(\frac{probs0_a}{probs0_b})
@@ -289,8 +290,8 @@ class Geometric(Distribution):
         Sampling.
 
         Args:
-            shape (tuple): shape of the sample. Default: ().
-            probs (Tensor): probability of success. Default: self.probs.
+            shape (tuple): The shape of the sample. Default: ().
+            probs (Tensor): The probability of success. Default: self.probs.
 
         Returns:
             Tensor, shape is shape + batch_shape.
diff --git a/mindspore/nn/probability/distribution/normal.py b/mindspore/nn/probability/distribution/normal.py
index 0c4862195c..a6fcb01d15 100644
--- a/mindspore/nn/probability/distribution/normal.py
+++ b/mindspore/nn/probability/distribution/normal.py
@@ -27,18 +27,18 @@ class Normal(Distribution):
     Normal distribution.
 
     Args:
-        mean (int, float, list, numpy.ndarray, Tensor, Parameter): mean of the Normal distribution.
-        sd (int, float, list, numpy.ndarray, Tensor, Parameter): stddev of the Normal distribution.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): type of the distribution. Default: mstype.float32.
-        name (str): name of the distribution. Default: Normal.
+        mean (int, float, list, numpy.ndarray, Tensor, Parameter): The mean of the Normal distribution.
+        sd (int, float, list, numpy.ndarray, Tensor, Parameter): The standard deviation of the Normal distribution.
+        seed (int): The seed used in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
+        name (str): The name of the distribution. Default: Normal.
 
     Note:
-        Standard deviation should be greater than zero.
-        dist_spec_args are mean and sd.
-        dtype should be float type because Normal distributions are continuous.
+        `sd` should be greater than zero.
+        dist_spec_args are `mean` and `sd`.
+        `dtype` should be float type because Normal distributions are continuous.
 
-    Examples:
+        Examples:
         >>> # To initialize a Normal distribution of mean 3.0 and standard deviation 4.0
         >>> import mindspore.nn.probability.distribution as msd
         >>> n = msd.Normal(3.0, 4.0, dtype=mstype.float32)
@@ -161,7 +161,7 @@ class Normal(Distribution):
 
     def _check_param(self, mean, sd):
         """
-        Check availablity of distribution specific args mean and sd.
+        Check availablity of distribution specific args `mean` and `sd`.
         """
         if mean is not None:
             if self.context_mode == 0:
@@ -187,21 +187,21 @@ class Normal(Distribution):
 
     def _mean(self, mean=None, sd=None):
         """
-        Mean of the distribution.
+        The mean of the distribution.
         """
         mean, sd = self._check_param(mean, sd)
         return mean
 
     def _mode(self, mean=None, sd=None):
         """
-        Mode of the distribution.
+        The mode of the distribution.
         """
         mean, sd = self._check_param(mean, sd)
         return mean
 
     def _sd(self, mean=None, sd=None):
         """
-        Standard deviation of the distribution.
+        The standard deviation of the distribution.
         """
         mean, sd = self._check_param(mean, sd)
         return sd
@@ -221,11 +221,11 @@ class Normal(Distribution):
         Evaluate cross_entropy between normal distributions.
 
         Args:
-            dist (str): type of the distributions. Should be "Normal" in this case.
-            mean_b (Tensor): mean of distribution b.
-            sd_b (Tensor): standard deviation distribution b.
-            mean_a (Tensor): mean of distribution a. Default: self._mean_value.
-            sd_a (Tensor): standard deviation distribution a. Default: self._sd_value.
+            dist (str): Type of the distributions. Should be "Normal" in this case.
+            mean_b (Tensor): Mean of distribution b.
+            sd_b (Tensor): Standard deviation distribution b.
+            mean_a (Tensor): Mean of distribution a. Default: self._mean_value.
+            sd_a (Tensor): Standard deviation distribution a. Default: self._sd_value.
         """
         check_distribution_name(dist, 'Normal')
         return self._entropy(mean, sd) + self._kl_loss(dist, mean_b, sd_b, mean, sd)
@@ -235,9 +235,9 @@ class Normal(Distribution):
         Evaluate log probability.
 
         Args:
-            value (Tensor): value to be evaluated.
-            mean (Tensor): mean of the distribution. Default: self._mean_value.
-            sd (Tensor): standard deviation the distribution. Default: self._sd_value.
+            value (Tensor): The value to be evaluated.
+            mean (Tensor): The mean of the distribution. Default: self._mean_value.
+            sd (Tensor): The standard deviation the distribution. Default: self._sd_value.
 
         .. math::
             L(x) = -1* \frac{(x - \mu)^2}{2. * \sigma^2} - \log(\sqrt(2* \pi * \sigma^2))
@@ -254,9 +254,9 @@ class Normal(Distribution):
         Evaluate cdf of given value.
 
         Args:
-            value (Tensor): value to be evaluated.
-            mean (Tensor): mean of the distribution. Default: self._mean_value.
-            sd (Tensor): standard deviation the distribution. Default: self._sd_value.
+            value (Tensor): The value to be evaluated.
+            mean (Tensor): The mean of the distribution. Default: self._mean_value.
+            sd (Tensor): The standard deviation the distribution. Default: self._sd_value.
 
         .. math::
             cdf(x) = 0.5 * (1+ Erf((x - \mu) / ( \sigma * \sqrt(2))))
@@ -270,14 +270,14 @@ class Normal(Distribution):
 
     def _kl_loss(self, dist, mean_b, sd_b, mean=None, sd=None):
         r"""
-        Evaluate Normal-Normal kl divergence, i.e. KL(a||b).
+        Evaluate Normal-Normal KL divergence, i.e. KL(a||b).
 
         Args:
-            dist (str): type of the distributions. Should be "Normal" in this case.
-            mean_b (Tensor): mean of distribution b.
-            sd_b (Tensor): standard deviation distribution b.
-            mean_a (Tensor): mean of distribution a. Default: self._mean_value.
-            sd_a (Tensor): standard deviation distribution a. Default: self._sd_value.
+            dist (str): The type of the distributions. Should be "Normal" in this case.
+            mean_b (Tensor): The mean of distribution b.
+            sd_b (Tensor): The standard deviation distribution b.
+            mean_a (Tensor): The mean of distribution a. Default: self._mean_value.
+            sd_a (Tensor): The standard deviation distribution a. Default: self._sd_value.
 
         .. math::
             KL(a||b) = 0.5 * (\frac{MEAN(a)}{STD(b)} - \frac{MEAN(b)}{STD(b)}) ^ 2 +
@@ -298,9 +298,9 @@ class Normal(Distribution):
         Sampling.
 
         Args:
-            shape (tuple): shape of the sample. Default: ().
-            mean (Tensor): mean of the samples. Default: self._mean_value.
-            sd (Tensor): standard deviation of the samples. Default: self._sd_value.
+            shape (tuple): The shape of the sample. Default: ().
+            mean (Tensor): The mean of the samples. Default: self._mean_value.
+            sd (Tensor): The standard deviation of the samples. Default: self._sd_value.
 
         Returns:
             Tensor, shape is shape + batch_shape.
diff --git a/mindspore/nn/probability/distribution/transformed_distribution.py b/mindspore/nn/probability/distribution/transformed_distribution.py
index 9a9dca3671..db1c93bd4d 100644
--- a/mindspore/nn/probability/distribution/transformed_distribution.py
+++ b/mindspore/nn/probability/distribution/transformed_distribution.py
@@ -27,14 +27,14 @@ class TransformedDistribution(Distribution):
     to a new distribution through the operation defined by the bijector.
 
     Args:
-        bijector (Bijector): transformation to perform.
+        bijector (Bijector): The transformation to perform.
         distribution (Distribution): The original distribution.
-        name (str): name of the transformed distribution. Default: transformed_distribution.
+        name (str): The name of the transformed distribution. Default: transformed_distribution.
 
     Note:
         The arguments used to initialize the original distribution cannot be None.
         For example, mynormal = nn.Normal(dtype=dtyple.float32) cannot be used to initialized a
-        TransformedDistribution since mean and sd are not specified.
+        TransformedDistribution since `mean` and `sd` are not specified.
 
     Examples:
         >>> # To initialize a transformed distribution, e.g. lognormal distribution,
diff --git a/mindspore/nn/probability/distribution/uniform.py b/mindspore/nn/probability/distribution/uniform.py
index 9cd86cf9ae..7de0d7f33f 100644
--- a/mindspore/nn/probability/distribution/uniform.py
+++ b/mindspore/nn/probability/distribution/uniform.py
@@ -26,18 +26,18 @@ class Uniform(Distribution):
     Example class: Uniform Distribution.
 
     Args:
-        low (int, float, list, numpy.ndarray, Tensor, Parameter): lower bound of the distribution.
-        high (int, float, list, numpy.ndarray, Tensor, Parameter): upper bound of the distribution.
-        seed (int): seed to use in sampling. Global seed is used if it is None. Default: None.
-        dtype (mindspore.dtype): type of the distribution. Default: mstype.float32.
-        name (str): name of the distribution. Default: Uniform.
+        low (int, float, list, numpy.ndarray, Tensor, Parameter): The lower bound of the distribution.
+        high (int, float, list, numpy.ndarray, Tensor, Parameter): The upper bound of the distribution.
+        seed (int): The seed uses in sampling. Global seed is used if it is None. Default: None.
+        dtype (mindspore.dtype): The type of the distribution. Default: mstype.float32.
+        name (str): The name of the distribution. Default: Uniform.
 
     Note:
-        low should be stricly less than high.
-        dist_spec_args are high and low.
-        dtype should be float type because Uniform distributions are continuous.
+        `low` should be stricly less than `high`.
+        dist_spec_args are `high` and `low`.
+        `dtype` should be float type because Uniform distributions are continuous.
 
-    Examples:
+        Examples:
         >>> # To initialize a Uniform distribution of mean 3.0 and standard deviation 4.0
         >>> import mindspore.nn.probability.distribution as msd
         >>> u = msd.Uniform(0.0, 1.0, dtype=mstype.float32)
@@ -164,7 +164,7 @@ class Uniform(Distribution):
 
     def _check_param(self, low, high):
         """
-        Check availablity of distribution specific args low and high.
+        Check availablity of distribution specific args `low` and `high`.
         """
         if low is not None:
             if self.context_mode == 0:
@@ -205,6 +205,7 @@ class Uniform(Distribution):
     def _range(self, low=None, high=None):
         r"""
         Return the range of the distribution.
+
         .. math::
             range(U) = high -low
         """
@@ -240,11 +241,11 @@ class Uniform(Distribution):
         Evaluate cross_entropy between Uniform distributoins.
 
         Args:
-            dist (str): type of the distributions. Should be "Uniform" in this case.
-            low_b (Tensor): lower bound of distribution b.
-            high_b (Tensor): upper bound of distribution b.
-            low_a (Tensor): lower bound of distribution a. Default: self.low.
-            high_a (Tensor): upper bound of distribution a. Default: self.high.
+            dist (str): The type of the distributions. Should be "Uniform" in this case.
+            low_b (Tensor): The lower bound of distribution b.
+            high_b (Tensor): The upper bound of distribution b.
+            low_a (Tensor): The lower bound of distribution a. Default: self.low.
+            high_a (Tensor): The upper bound of distribution a. Default: self.high.
         """
         check_distribution_name(dist, 'Uniform')
         return self._entropy(low, high) + self._kl_loss(dist, low_b, high_b, low, high)
@@ -254,9 +255,9 @@ class Uniform(Distribution):
         pdf of Uniform distribution.
 
         Args:
-            value (Tensor): value to be evaluated.
-            low (Tensor): lower bound of the distribution. Default: self.low.
-            high (Tensor): upper bound of the distribution. Default: self.high.
+            value (Tensor): The value to be evaluated.
+            low (Tensor): The lower bound of the distribution. Default: self.low.
+            high (Tensor): The upper bound of the distribution. Default: self.high.
 
         .. math::
             pdf(x) = 0 if x < low;
@@ -277,14 +278,14 @@ class Uniform(Distribution):
 
     def _kl_loss(self, dist, low_b, high_b, low=None, high=None):
         """
-        Evaluate uniform-uniform kl divergence, i.e. KL(a||b).
+        Evaluate uniform-uniform KL divergence, i.e. KL(a||b).
 
         Args:
-            dist (str): type of the distributions. Should be "Uniform" in this case.
-            low_b (Tensor): lower bound of distribution b.
-            high_b (Tensor): upper bound of distribution b.
-            low_a (Tensor): lower bound of distribution a. Default: self.low.
-            high_a (Tensor): upper bound of distribution a. Default: self.high.
+            dist (str): The type of the distributions. Should be "Uniform" in this case.
+            low_b (Tensor): The lower bound of distribution b.
+            high_b (Tensor): The upper bound of distribution b.
+            low_a (Tensor): The lower bound of distribution a. Default: self.low.
+            high_a (Tensor): The upper bound of distribution a. Default: self.high.
         """
         check_distribution_name(dist, 'Uniform')
         low_b = self._check_value(low_b, 'low_b')
@@ -301,9 +302,9 @@ class Uniform(Distribution):
         cdf of Uniform distribution.
 
         Args:
-            value (Tensor): value to be evaluated.
-            low (Tensor): lower bound of the distribution. Default: self.low.
-            high (Tensor): upper bound of the distribution. Default: self.high.
+            value (Tensor): The value to be evaluated.
+            low (Tensor): The lower bound of the distribution. Default: self.low.
+            high (Tensor): The upper bound of the distribution. Default: self.high.
 
         .. math::
             cdf(x) = 0 if x < low;
@@ -327,9 +328,9 @@ class Uniform(Distribution):
         Sampling.
 
         Args:
-            shape (tuple): shape of the sample. Default: ().
-            low (Tensor): lower bound of the distribution. Default: self.low.
-            high (Tensor): upper bound of the distribution. Default: self.high.
+            shape (tuple): The shape of the sample. Default: ().
+            low (Tensor): The lower bound of the distribution. Default: self.low.
+            high (Tensor): The upper bound of the distribution. Default: self.high.
 
         Returns:
             Tensor, shape is shape + batch_shape.
diff --git a/mindspore/nn/probability/dpn/__init__.py b/mindspore/nn/probability/dpn/__init__.py
index d5636469a9..bf8dbb7660 100644
--- a/mindspore/nn/probability/dpn/__init__.py
+++ b/mindspore/nn/probability/dpn/__init__.py
@@ -13,8 +13,6 @@
 # limitations under the License.
 # ============================================================================
 """
-Deep Probability Network(dpn).
-
 Deep probability network such as BNN and VAE network.
 """
 
diff --git a/mindspore/nn/probability/dpn/vae/cvae.py b/mindspore/nn/probability/dpn/vae/cvae.py
index 32a7ae332e..e2b220d6fd 100644
--- a/mindspore/nn/probability/dpn/vae/cvae.py
+++ b/mindspore/nn/probability/dpn/vae/cvae.py
@@ -25,25 +25,26 @@ class ConditionalVAE(Cell):
     Conditional Variational Auto-Encoder (CVAE).
 
     The difference with VAE is that CVAE uses labels information.
-    see more details in `Learning Structured Output Representation using Deep Conditional Generative Models
+    For more details, refer to `Learning Structured Output Representation using Deep Conditional Generative Models
     <http://papers.nips.cc/paper/5775-learning-structured-output-representation-using-deep-conditional-
     generative-models>`_.
 
     Note:
-        When define the encoder and decoder, the shape of the encoder's output tensor and decoder's input tensor
+        When encoder and decoder ard defined, the shape of the encoder's output tensor and decoder's input tensor
         should be :math:`(N, hidden\_size)`.
         The latent_size should be less than or equal to the hidden_size.
 
     Args:
-        encoder(Cell): The DNN model defined as encoder.
+        encoder(Cell): The Deep Neural Network (DNN) model defined as encoder.
         decoder(Cell): The DNN model defined as decoder.
         hidden_size(int): The size of encoder's output tensor.
         latent_size(int): The size of the latent space.
         num_classes(int): The number of classes.
 
     Inputs:
-        - **input_x** (Tensor) - the same shape as the input of encoder, the shape is :math:`(N, C, H, W)`.
-        - **input_y** (Tensor) - the tensor of the target data, the shape is :math:`(N,)`.
+        - **input_x** (Tensor) - The input tensor is the same shape as the input of encoder, with the shape
+        being :math:`(N, C, H, W)`.
+        - **input_y** (Tensor) - The tensor of the target data, the shape is :math:`(N,)`.
 
     Outputs:
         - **output** (tuple) - (recon_x(Tensor), x(Tensor), mu(Tensor), std(Tensor)).
@@ -96,15 +97,15 @@ class ConditionalVAE(Cell):
 
     def generate_sample(self, sample_y, generate_nums, shape):
         """
-        Randomly sample from latent space to generate sample.
+        Randomly sample from the latent space to generate samples.
 
         Args:
-            sample_y (Tensor): Define the label of sample. Tensor of shape (generate_nums, ) and type mindspore.int32.
+            sample_y (Tensor): Define the label of samples. Tensor of shape (generate_nums, ) and type mindspore.int32.
             generate_nums (int): The number of samples to generate.
-            shape(tuple): The shape of sample, it should be (generate_nums, C, H, W) or (-1, C, H, W).
+            shape(tuple): The shape of sample, which should be the format of (generate_nums, C, H, W) or (-1, C, H, W).
 
         Returns:
-            Tensor, the generated sample.
+            Tensor, the generated samples.
         """
         generate_nums = check_int_positive(generate_nums)
         if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums):
@@ -118,7 +119,7 @@ class ConditionalVAE(Cell):
 
     def reconstruct_sample(self, x, y):
         """
-        Reconstruct sample from original data.
+        Reconstruct samples from original data.
 
         Args:
             x (Tensor): The input tensor to be reconstructed, the shape is (N, C, H, W).
diff --git a/mindspore/nn/probability/dpn/vae/vae.py b/mindspore/nn/probability/dpn/vae/vae.py
index 6e79641030..5f5c87cf35 100644
--- a/mindspore/nn/probability/dpn/vae/vae.py
+++ b/mindspore/nn/probability/dpn/vae/vae.py
@@ -25,21 +25,22 @@ class VAE(Cell):
     Variational Auto-Encoder (VAE).
 
     The VAE defines a generative model, `Z` is sampled from the prior, then used to reconstruct `X` by a decoder.
-    see more details in `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
+    For more details, refer to `Auto-Encoding Variational Bayes <https://arxiv.org/abs/1312.6114>`_.
 
     Note:
-        When define the encoder and decoder, the shape of the encoder's output tensor and decoder's input tensor
+        When the encoder and decoder are defined, the shape of the encoder's output tensor and decoder's input tensor
         should be :math:`(N, hidden\_size)`.
         The latent_size should be less than or equal to the hidden_size.
 
     Args:
-        encoder(Cell): The DNN model defined as encoder.
+        encoder(Cell): The Deep Neural Network (DNN) model defined as encoder.
         decoder(Cell): The DNN model defined as decoder.
         hidden_size(int): The size of encoder's output tensor.
         latent_size(int): The size of the latent space.
 
     Inputs:
-        - **input** (Tensor) - the same shape as the input of encoder, the shape is :math:`(N, C, H, W)`.
+        - **input** (Tensor) - The input tensor is the same shape as the input of encoder, the shape
+        is :math:`(N, C, H, W)`.
 
     Outputs:
         - **output** (Tuple) - (recon_x(Tensor), x(Tensor), mu(Tensor), std(Tensor)).
@@ -84,14 +85,14 @@ class VAE(Cell):
 
     def generate_sample(self, generate_nums, shape):
         """
-        Randomly sample from latent space to generate sample.
+        Randomly sample from latent space to generate samples.
 
         Args:
             generate_nums (int): The number of samples to generate.
             shape(tuple): The shape of sample, it should be (generate_nums, C, H, W) or (-1, C, H, W).
 
         Returns:
-            Tensor, the generated sample.
+            Tensor, the generated samples.
         """
         generate_nums = check_int_positive(generate_nums)
         if not isinstance(shape, tuple) or len(shape) != 4 or (shape[0] != -1 and shape[0] != generate_nums):
@@ -103,7 +104,7 @@ class VAE(Cell):
 
     def reconstruct_sample(self, x):
         """
-        Reconstruct sample from original data.
+        Reconstruct samples from original data.
 
         Args:
             x (Tensor): The input tensor to be reconstructed, the shape is (N, C, H, W).
diff --git a/mindspore/nn/probability/infer/__init__.py b/mindspore/nn/probability/infer/__init__.py
index 52968a8e0d..3e190bb2d5 100644
--- a/mindspore/nn/probability/infer/__init__.py
+++ b/mindspore/nn/probability/infer/__init__.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Infer algorithms in Probabilistic Programming.
+Inference algorithms in Probabilistic Programming.
 """
 
 from .variational import *
diff --git a/mindspore/nn/probability/infer/variational/elbo.py b/mindspore/nn/probability/infer/variational/elbo.py
index 5d1a01036b..f76faccb1c 100644
--- a/mindspore/nn/probability/infer/variational/elbo.py
+++ b/mindspore/nn/probability/infer/variational/elbo.py
@@ -24,10 +24,10 @@ class ELBO(Cell):
     The Evidence Lower Bound (ELBO).
 
     Variational inference minimizes the Kullback-Leibler (KL) divergence from the variational distribution to
-    the posterior distribution. It maximizes the evidence lower bound (ELBO), a lower bound on the logarithm of
+    the posterior distribution. It maximizes the ELBO, a lower bound on the logarithm of
     the marginal probability of the observations log p(x). The ELBO is equal to the negative KL divergence up to
     an additive constant.
-    see more details in `Variational Inference: A Review for Statisticians <https://arxiv.org/abs/1601.00670>`_.
+    For more details, refer to `Variational Inference: A Review for Statisticians <https://arxiv.org/abs/1601.00670>`_.
 
     Args:
         latent_prior(str): The prior distribution of latent space. Default: Normal.
diff --git a/mindspore/nn/probability/infer/variational/svi.py b/mindspore/nn/probability/infer/variational/svi.py
index 2613275ffe..92ea9aed77 100644
--- a/mindspore/nn/probability/infer/variational/svi.py
+++ b/mindspore/nn/probability/infer/variational/svi.py
@@ -26,9 +26,9 @@ class SVI:
     Stochastic Variational Inference(SVI).
 
     Variational inference casts the inference problem as an optimization. Some distributions over the hidden
-    variables that is indexed by a set of free parameters, and then optimize the parameters to make it closest to
+    variables are indexed by a set of free parameters, which are optimized to make distributions closest to
     the posterior of interest.
-    see more details in `Variational Inference: A Review for Statisticians <https://arxiv.org/abs/1601.00670>`_.
+    For more details, refer to `Variational Inference: A Review for Statisticians <https://arxiv.org/abs/1601.00670>`_.
 
     Args:
         net_with_loss(Cell): Cell with loss function.
diff --git a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
index 35c87d4f1b..67ef74b28a 100644
--- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
+++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
@@ -43,15 +43,15 @@ class UncertaintyEvaluation:
             - regression: A regression model.
             - classification: A classification model.
         num_classes (int): The number of labels of classification.
-                      If the task type is classification, it must be set; if not classification, it need not to be set.
+                      If the task type is classification, it must be set; otherwise, it is not needed.
                       Default: None.
         epochs (int): Total number of iterations on the data. Default: 1.
         epi_uncer_model_path (str): The save or read path of the epistemic uncertainty model. Default: None.
         ale_uncer_model_path (str): The save or read path of the aleatoric uncertainty model. Default: None.
-        save_model (bool): Save the uncertainty model or not, if True, the epi_uncer_model_path
-                        and ale_uncer_model_path should not be None. If False, give the path of
-                        the uncertainty model, it will load the model to evaluate, if not given
-                        the path, it will not save or load the uncertainty model. Default: False.
+        save_model (bool): Whether to save the uncertainty model or not, if True, the epi_uncer_model_path
+                        and ale_uncer_model_path should not be None. If False, the model to evaluate will be loaded from
+                        the the path of the uncertainty model; if the path is not given , it will not save or load the
+                        uncertainty model. Default: False.
 
     Examples:
         >>> network = LeNet()
diff --git a/mindspore/nn/probability/transforms/__init__.py b/mindspore/nn/probability/transforms/__init__.py
index 7b8524359c..0a3a7e468d 100644
--- a/mindspore/nn/probability/transforms/__init__.py
+++ b/mindspore/nn/probability/transforms/__init__.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Transforms.
-
-The high-level components used to transform model between DNN and BNN.
+The high-level components used to transform model between Deep Neural Network (DNN) and Bayesian Neural Network (BNN).
 """
 from . import transform_bnn
 from .transform_bnn import TransformToBNN
diff --git a/mindspore/nn/probability/transforms/transform_bnn.py b/mindspore/nn/probability/transforms/transform_bnn.py
index 005afed9ca..1dc89b59e7 100644
--- a/mindspore/nn/probability/transforms/transform_bnn.py
+++ b/mindspore/nn/probability/transforms/transform_bnn.py
@@ -32,7 +32,7 @@ class TransformToBNN:
     Args:
         trainable_dnn (Cell): A trainable DNN model (backbone) wrapped by TrainOneStepCell.
         dnn_factor ((int, float): The coefficient of backbone's loss, which is computed by loss function. Default: 1.
-        bnn_factor (int, float): The coefficient of kl loss, which is kl divergence of Bayesian layer. Default: 1.
+        bnn_factor (int, float): The coefficient of KL loss, which is KL divergence of Bayesian layer. Default: 1.
 
     Examples:
         >>> class Net(nn.Cell):
@@ -139,15 +139,15 @@ class TransformToBNN:
 
         Args:
             dnn_layer_type (Cell): The type of DNN layer to be transformed to BNN layer. The optional values are
-                nn.Dense, nn.Conv2d.
+                nn.Dense and nn.Conv2d.
             bnn_layer_type (Cell): The type of BNN layer to be transformed to. The optional values are
-                DenseReparam, ConvReparam.
+                DenseReparam and ConvReparam.
             get_args: The arguments gotten from the DNN layer. Default: None.
             add_args (dict): The new arguments added to BNN layer. Note that the arguments in `add_args` should not
                 duplicate arguments in `get_args`. Default: None.
 
         Returns:
-            Cell, a trainable model wrapped by TrainOneStepCell, whose sprcific type of layer is transformed to the
+            Cell, a trainable model wrapped by TrainOneStepCell, whose specific type of layer is transformed to the
             corresponding bayesian layer.
 
         Examples: