diff --git a/mindspore/nn/layer/basic.py b/mindspore/nn/layer/basic.py index a4f18603fc..f22dc3c6ab 100644 --- a/mindspore/nn/layer/basic.py +++ b/mindspore/nn/layer/basic.py @@ -481,28 +481,32 @@ class OneHot(Cell): """ Returns a one-hot tensor. - The locations represented by indices in argument 'indices' take value on_value, + The locations represented by indices in argument `indices` take value on_value, while all other locations take value off_value. Note: If the input indices is rank :math:`N`, the output will have rank :math:`N+1`. The new axis is created at dimension `axis`. - If :math:`indices` is a scalar, the output shape will be a vector of length :math:`depth`. + If `indices` is a scalar, the output shape will be a vector of length `depth`. - If :math:`indices` is a vector of length :math:`features`, the output shape will be: + If `indices` is a vector of length `features`, the output shape will be: - :math:`features * depth if axis == -1` + .. code-block:: - :math:`depth * features if axis == 0` + features * depth if axis == -1 - If :math:`indices` is a matrix with shape :math:`[batch, features]`, the output shape will be: + depth * features if axis == 0 - :math:`batch * features * depth if axis == -1` + If `indices` is a matrix with shape `[batch, features]`, the output shape will be: - :math:`batch * depth * features if axis == 1` + .. code-block:: - :math:`depth * batch * features if axis == 0` + batch * features * depth if axis == -1 + + batch * depth * features if axis == 1 + + depth * batch * features if axis == 0 Args: axis (int): Features x depth if axis is -1, depth x features @@ -519,7 +523,7 @@ class OneHot(Cell): - **indices** (Tensor) - A tensor of indices of data type mindspore.int32 and arbitrary shape. Outputs: - Tensor, the one-hot tensor of data type 'dtype' with dimension at 'axis' expanded to 'depth' and filled with + Tensor, the one-hot tensor of data type `dtype` with dimension at `axis` expanded to `depth` and filled with on_value and off_value. Supported Platforms: @@ -563,7 +567,9 @@ class Pad(Cell): be extended behind of the `D` th dimension of the input tensor. The padded size of each dimension D of the output is: - :math:`paddings[D, 0]` + input_x.dim_size(D) + paddings[D, 1]`. + .. code-block:: + + paddings[D, 0] + input_x.dim_size(D) + paddings[D, 1] mode (str): Specifies padding mode. The optional values are "CONSTANT", "REFLECT", "SYMMETRIC". Default: "CONSTANT". @@ -723,9 +729,14 @@ class Unfold(Cell): Outputs: Tensor, a 4-D tensor whose data type is same as `input_x`, and the shape is [out_batch, out_depth, out_row, out_col] where `out_batch` is the same as the `in_batch`. - :math:`out_depth = ksize_row * ksize_col * in_depth`, - :math:`out_row = (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1`, - :math:`out_col = (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1`. + + .. code-block:: + + out_depth = ksize_row * ksize_col * in_depth + + out_row = (in_row - (ksize_row + (ksize_row - 1) * (rate_row - 1))) // stride_row + 1 + + out_col = (in_col - (ksize_col + (ksize_col - 1) * (rate_col - 1))) // stride_col + 1 Supported Platforms: ``Ascend`` @@ -867,13 +878,15 @@ def _get_matrix_diag_part_assist(x_shape, x_dtype): class MatrixDiag(Cell): - """ + r""" Returns a batched diagonal tensor with a given batched diagonal values. Assume :math:`x` has :math:`k` dimensions :math:`[I, J, K, ..., N]`, then the output is a tensor of rank :math:`k+1` with dimensions :math:`[I, J, K, ..., N, N]` where: - :math:`output[i, j, k, ..., m, n] = 1{m=n} * x[i, j, k, ..., n]`. + .. code-block:: + + output[i, j, k, ..., m, n] = 1{m=n} * x[i, j, k, ..., n] Inputs: - **x** (Tensor) - The diagonal values. It can be one of the following data types: @@ -911,10 +924,12 @@ class MatrixDiagPart(Cell): r""" Returns the batched diagonal part of a batched tensor. - Assume :math:`x` has :math:`k` dimensions :math:`[I, J, K, ..., M, N]`, then the output is a tensor of rank + Assume `x` has :math:`k` dimensions :math:`[I, J, K, ..., M, N]`, then the output is a tensor of rank :math:`k-1` with dimensions :math:`[I, J, K, ..., min(M, N]` where: - :math:`output[i, j, k, ..., n] = x[i, j, k, ..., n, n]`. + .. code-block:: + + output[i, j, k, ..., n] = x[i, j, k, ..., n, n] Inputs: - **x** (Tensor) - The batched tensor. It can be one of the following data types: @@ -953,13 +968,15 @@ class MatrixSetDiag(Cell): r""" Modifies the batched diagonal part of a batched tensor. - Assume :math:`x` has :math:`k+1` dimensions :math:`[I, J, K, ..., M, N]` and :math:`diagonal` has :math:`k` + Assume `x` has :math:`k+1` dimensions :math:`[I, J, K, ..., M, N]` and `diagonal` has :math:`k` dimensions :math:`[I, J, K, ..., min(M, N)]`. Then the output is a tensor of rank :math:`k+1` with dimensions :math:`[I, J, K, ..., M, N]` where: - :math:`output[i, j, k, ..., m, n] = diagnoal[i, j, k, ..., n]` for :math:`m == n`. + .. code-block:: + + output[i, j, k, ..., m, n] = diagnoal[i, j, k, ..., n] for m == n - :math:`output[i, j, k, ..., m, n] = x[i, j, k, ..., m, n]` for :math:`m != n`. + output[i, j, k, ..., m, n] = x[i, j, k, ..., m, n] for m != n Inputs: - **x** (Tensor) - The batched tensor. Rank k+1, where k >= 1. It can be one of the following data types: diff --git a/mindspore/nn/layer/math.py b/mindspore/nn/layer/math.py index aa5fd2b877..14701d455a 100644 --- a/mindspore/nn/layer/math.py +++ b/mindspore/nn/layer/math.py @@ -105,7 +105,7 @@ class Range(Cell): r""" Creates a sequence of numbers in range [start, limit) with step size delta. - The size of output is \left \lfloor \frac{limit-start}{delta} \right \rfloor + 1 and `delta` is the gap + The size of output is :math:`\left \lfloor \frac{limit-start}{delta} \right \rfloor + 1` and `delta` is the gap between two values in the tensor. .. math:: @@ -827,7 +827,7 @@ def matmul_op_select(x1_shape, x2_shape, transpose_x1, transpose_x2): class MatMul(Cell): - """ + r""" Multiplies matrix `x1` by matrix `x2`. - If both x1 and x2 are 1-dimensional, the dot product is returned. diff --git a/mindspore/nn/layer/quant.py b/mindspore/nn/layer/quant.py index 4d1ee87020..e428c857f3 100644 --- a/mindspore/nn/layer/quant.py +++ b/mindspore/nn/layer/quant.py @@ -212,26 +212,26 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): r""" Quantization aware operation which provides the fake quantization observer function on data with min and max. - The running min/max :math:`x_\text{min}` and :math:`x_\text{max}` are computed as: + The running min/max :math:`x_{min}` and :math:`x_{max}` are computed as: .. math:: - \begin{array}{ll} \\ - x_\text{min} = - \begin{cases} - \min(\min(X), 0) - & \text{ if } ema = \text{False} \\ - \min((1 - c) \min(X) + \text{c } x_\text{min}, 0) - & \text{ if } \text{otherwise} - \end{cases}\\ - x_\text{max} = - \begin{cases} - \max(\max(X), 0) - & \text{ if } ema = \text{False} \\ - \max((1 - c) \max(X) + \text{c } x_\text{max}, 0) - & \text{ if } \text{otherwise} - \end{cases} - \end{array} + \begin{array}{ll} \\ + x_{min} = + \begin{cases} + \min(\min(X), 0) + & \text{ if } ema = \text{False} \\ + \min((1 - c) \min(X) + \text{c } x_{min}, 0) + & \text{ if } \text{otherwise} + \end{cases}\\ + x_{max} = + \begin{cases} + \max(\max(X), 0) + & \text{ if } ema = \text{False} \\ + \max((1 - c) \max(X) + \text{c } x_{max}, 0) + & \text{ if } \text{otherwise} + \end{cases} + \end{array} where X is the input tensor, and :math:`c` is the `ema_decay`. @@ -239,32 +239,32 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver): .. math:: - \begin{array}{ll} \\ - s = - \begin{cases} - \frac{x_\text{max} - x_\text{min}}{Q_\text{max} - Q_\text{min}} - & \text{ if } symmetric = \text{False} \\ - \frac{2\max(x_\text{max}, \left | x_\text{min} \right |) }{Q_\text{max} - Q_\text{min}} - & \text{ if } \text{otherwise} - \end{cases}\\ - zp\_min = Q_\text{min} - \frac{x_\text{min}}{scale} \\ - zp = \left \lfloor \min(Q_\text{max}, \max(Q_\text{min}, zp\_min)) + 0.5 \right \rfloor - \end{array} - - where :math:`Q_\text{max}` and :math:`Q_\text{min}` is decided by quant_dtype, for example, if quant_dtype=INT8, - then :math:`Q_\text{max}`=127 and :math:`Q_\text{min}`=-128. + \begin{array}{ll} \\ + s = + \begin{cases} + \frac{x_{max} - x_{min}}{Q_{max} - Q_{min}} + & \text{ if } symmetric = \text{False} \\ + \frac{2\max(x_{max}, \left | x_{min} \right |) }{Q_{max} - Q_{min}} + & \text{ if } \text{otherwise} + \end{cases}\\ + zp\_min = Q_{min} - \frac{x_{min}}{scale} \\ + zp = \left \lfloor \min(Q_{max}, \max(Q_{min}, zp\_min)) + 0.5 \right \rfloor + \end{array} + + where :math:`Q_{max}` and :math:`Q_{min}` is decided by quant_dtype, for example, if quant_dtype=INT8, + then :math:`Q_{max} = 127` and :math:`Q_{min} = -128`. The fake quant output is computed as: .. math:: - \begin{array}{ll} \\ - u_\text{min} = (Q_\text{min} - zp) * scale \\ - u_\text{max} = (Q_\text{max} - zp) * scale \\ - u_X = \left \lfloor \frac{\min(u_\text{max}, \max(u_\text{min}, X)) - u_\text{min}}{scale} - + 0.5 \right \rfloor \\ - output = u_X * scale + u_\text{min} - \end{array} + \begin{array}{ll} \\ + u_{min} = (Q_{min} - zp) * scale \\ + u_{max} = (Q_{max} - zp) * scale \\ + u_X = \left \lfloor \frac{\min(u_{max}, \max(u_{min}, X)) - u_{min}}{scale} + + 0.5 \right \rfloor \\ + output = u_X * scale + u_{min} + \end{array} Args: @@ -393,7 +393,7 @@ class Conv2dBnFoldQuantOneConv(Cell): 2D convolution which use the convolution layer statistics once to calculate BatchNormal operation folded construct. This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: in_channels (int): The number of input channel :math:`C_{in}`. @@ -594,7 +594,7 @@ class Conv2dBnFoldQuant(Cell): 2D convolution with BatchNormal operation folded construct. This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: in_channels (int): The number of input channel :math:`C_{in}`. @@ -783,7 +783,7 @@ class Conv2dBnWithoutFoldQuant(Cell): 2D convolution and batchnorm without fold with fake quantized construct. This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: in_channels (int): The number of input channel :math:`C_{in}`. @@ -899,7 +899,7 @@ class Conv2dQuant(Cell): 2D convolution with fake quantized operation layer. This part is a more detailed overview of Conv2d operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: in_channels (int): The number of input channel :math:`C_{in}`. @@ -1010,7 +1010,7 @@ class DenseQuant(Cell): The fully connected layer with fake quantized operation. This part is a more detailed overview of Dense operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: in_channels (int): The dimension of the input space. @@ -1127,7 +1127,7 @@ class ActQuant(_QuantActivation): Add the fake quantized operation to the end of activation operation, by which the output of activation operation will be truncated. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: activation (Cell): Activation cell. @@ -1196,7 +1196,7 @@ class TensorAddQuant(Cell): Add fake quantized operation after TensorAdd operation. This part is a more detailed overview of TensorAdd operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. @@ -1249,7 +1249,7 @@ class MulQuant(Cell): Add fake quantized operation after `Mul` operation. This part is a more detailed overview of `Mul` operation. For more detials about Quantilization, - please refer to :class`mindspore.nn.FakeQuantWithMinMaxObserver`. + please refer to :class:`mindspore.nn.FakeQuantWithMinMaxObserver`. Args: ema_decay (float): Exponential Moving Average algorithm parameter. Default: 0.999. diff --git a/mindspore/nn/learning_rate_schedule.py b/mindspore/nn/learning_rate_schedule.py index 27dec3c7e1..360f497a67 100644 --- a/mindspore/nn/learning_rate_schedule.py +++ b/mindspore/nn/learning_rate_schedule.py @@ -79,7 +79,7 @@ class ExponentialDecayLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: @@ -137,7 +137,7 @@ class NaturalExpDecayLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: @@ -196,7 +196,7 @@ class InverseDecayLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: @@ -244,7 +244,7 @@ class CosineDecayLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: @@ -311,7 +311,7 @@ class PolynomialDecayLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: @@ -381,7 +381,7 @@ class WarmUpLR(LearningRateSchedule): Inputs: Tensor. The current step number. - Returns: + Outputs: Tensor. The learning rate value for the current step. Examples: diff --git a/mindspore/nn/optim/ftrl.py b/mindspore/nn/optim/ftrl.py index 57ea52d9f3..13f3e2292f 100644 --- a/mindspore/nn/optim/ftrl.py +++ b/mindspore/nn/optim/ftrl.py @@ -85,17 +85,17 @@ class FTRL(Optimizer): .. math:: - \begin{array}{ll} \\ - m_{t+1} = m_{t} + g^2 \\ - u_{t+1} = u_{t} + g - \frac{m_{t+1}^\text{-p} - m_{t}^\text{-p}}{\alpha } * \omega_{t} \\ - \omega_{t+1} = - \begin{cases} - \frac{(sign(u_{t+1}) * l1 - u_{t+1})}{\frac{m_{t+1}^\text{-p}}{\alpha } + 2 * l2 } - & \text{ if } |u_{t+1}| > l1 \\ - 0.0 - & \text{ otherwise } - \end{cases}\\ - \end{array} + \begin{array}{ll} \\ + m_{t+1} = m_{t} + g^2 \\ + u_{t+1} = u_{t} + g - \frac{m_{t+1}^\text{-p} - m_{t}^\text{-p}}{\alpha } * \omega_{t} \\ + \omega_{t+1} = + \begin{cases} + \frac{(sign(u_{t+1}) * l1 - u_{t+1})}{\frac{m_{t+1}^\text{-p}}{\alpha } + 2 * l2 } + & \text{ if } |u_{t+1}| > l1 \\ + 0.0 + & \text{ otherwise } + \end{cases}\\ + \end{array} :math:`m` represents `accum`, :math:`g` represents `grads`, :math:`t` represents updateing step, :math:`u` represents `linear`, :math:`p` represents `lr_power`, :math:`\alpha` represents `learning_rate`, diff --git a/mindspore/nn/optim/lars.py b/mindspore/nn/optim/lars.py index 88f978cd41..f19992fcb7 100755 --- a/mindspore/nn/optim/lars.py +++ b/mindspore/nn/optim/lars.py @@ -57,17 +57,17 @@ class LARS(Optimizer): .. math:: - \begin{array}\\ - \lambda = \frac{\theta \text{ * } || \omega || }{|| g_{t} || \text{ + } \delta \text{ * } || \omega || } \\ - \lambda = - \begin{cases} - \min(\frac{\lambda}{\alpha }, 1) - & \text{ if } clip = True \\ - \lambda - & \text{ otherwise } - \end{cases}\\ - g_{t+1} = \lambda * (g_{t} + \delta * \omega) - \end{array} + \begin{array}{ll} \\ + \lambda = \frac{\theta \text{ * } || \omega || }{|| g_{t} || \text{ + } \delta \text{ * } || \omega || } \\ + \lambda = + \begin{cases} + \min(\frac{\lambda}{\alpha }, 1) + & \text{ if } clip = True \\ + \lambda + & \text{ otherwise } + \end{cases}\\ + g_{t+1} = \lambda * (g_{t} + \delta * \omega) + \end{array} :math:`\theta` represents `coefficient`, :math:`\omega` represents `parameters`, :math:`g` represents `gradients`, :math:`t` represents updateing step, :math:`\delta` represents `weight_decay`,