quantization aware training frontend operators define.

5 years ago · d64f662c76
parent c75f75a3e1
commit d64f662c76
12 changed files with 1505 additions and 23 deletions
--- a/mindspore/nn/layer/init.py
+++ b/mindspore/nn/layer/init.py
@ -17,7 +17,7 @@ Layer.
 The high-level components(Cells) used to construct the neural network.
 """
-from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU
+from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU, HSigmoid, HSwish
 from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm
 from .container import SequentialCell, CellList
 from .conv import Conv2d, Conv2dTranspose
@ -26,8 +26,9 @@ from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradi
 from .embedding import Embedding
 from .pooling import AvgPool2d, MaxPool2d
-__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid', 'PReLU', 'get_activation', 'LeakyReLU',
+__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
-           'BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'ELU',
+           'PReLU', 'get_activation', 'LeakyReLU', 'HSigmoid', 'HSwish', 'ELU',
           'BatchNorm1d', 'BatchNorm2d', 'LayerNorm',
           'SequentialCell', 'CellList',
           'Conv2d', 'Conv2dTranspose',
           'LSTM',
--- a/mindspore/nn/layer/_quant.py
+++ b/mindspore/nn/layer/_quant.py
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@ -234,7 +234,7 @@ class Tanh(Cell):
 class GELU(Cell):
-    """
+    r"""
    Gaussian error linear unit activation function.
    Applies GELU function to each element of the input. The input is a Tensor with any valid shape.
@ -332,15 +332,74 @@ class PReLU(Cell):
        return v
 class HSwish(Cell):
    r"""
    rHard swish activation function.
    Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
    Hard swish is defined as:
    .. math::
        \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
    Inputs:
        - **input_data** (Tensor) - The input of Hswish.
    Outputs:
        Tensor, with the same type and shape as the `input_data`.
    """
    def __init__(self):
        super(HSwish, self).__init__()
        self.hswish = P.HSwish()
    def construct(self, x):
        return self.hswish(x)
 class HSigmoid(Cell):
    r"""
    Hard sigmoid activation function.
    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
    Hard sigmoid is defined as:
    .. math::
        \text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
    Inputs:
        - **input_data** (Tensor) - The input of HSigmoid.
    Outputs:
        Tensor, with the same type and shape as the `input_data`.
    """
    def __init__(self):
        super(HSigmoid, self).__init__()
        self.hsigmoid = P.HSigmoid()
    def construct(self, x):
        return self.hsigmoid(x)
 _activation = {
    'softmax': Softmax,
    'logsoftmax': LogSoftmax,
    'relu': ReLU,
    'relu6': ReLU6,
    'tanh': Tanh,
    'gelu': GELU,
    'sigmoid': Sigmoid,
    'prelu': PReLU,
-    'leakyrelu': LeakyReLU
+    'leakyrelu': LeakyReLU,
    'hswish': HSwish,
    'hsigmoid': HSigmoid,
 }
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@ -172,6 +172,28 @@ def get_bprop_relu6(self):
    return bprop
@bprop_getters.register(P.HSwish)
 def get_bprop_hswish(self):
    """Grad definition for `HSwish` operation."""
    input_grad = G.HSwishGrad()
    def bprop(x, out, dout):
        dx = input_grad(dout, x)
        return (dx,)
    return bprop
@bprop_getters.register(P.HSigmoid)
 def get_bprop_hsigmoid(self):
    """Grad definition for `HSigmoid` operation."""
    input_grad = G.HSigmoidGrad()
    def bprop(x, out, dout):
        dx = input_grad(dout, x)
        return (dx,)
    return bprop
@bprop_getters.register(P.Elu)
 def get_bprop_elu(self):
    """Grad definition for `Elu` operation."""
--- a/mindspore/ops/_grad/grad_quant_ops.py
+++ b/mindspore/ops/_grad/grad_quant_ops.py
@ -0,0 +1,82 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Generate bprop for aware quantization ops"""
 from .. import operations as P
 from .grad_base import bprop_getters
 from ..composite.multitype_ops.zeros_like_impl import zeros_like
@bprop_getters.register(P.FakeQuantWithMinMax)
 def get_bprop_fakequant_with_minmax(self):
    """Generate bprop for FakeQuantWithMinMax"""
    op = P.FakeQuantWithMinMaxGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
    def bprop(x, x_min, x_max, out, dout):
        dx = op(dout, x, x_min, x_max)
        return dx, zeros_like(x_min), zeros_like(x_max)
    return bprop
@bprop_getters.register(P.FakeQuantWithMinMaxPerChannel)
 def get_bprop_fakequant_with_minmax_perchannel(self):
    """Generate bprop for FakeQuantWithMinMaxPerChannel"""
    op = P.FakeQuantWithMinMaxPerChannelGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
    def bprop(x, x_min, x_max, out, dout):
        dx = op(dout, x, x_min, x_max)
        return dx, zeros_like(x_min), zeros_like(x_max)
    return bprop
@bprop_getters.register(P.BatchNormFold)
 def get_bprop_batchnorm_fold(self):
    """Generate bprop for BatchNormFold"""
    op = P.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn)
    def bprop(x, mean, variance, global_step, out, dout):
        dx = op(dout[0], dout[1], x, out[0], out[1], global_step)
        return dx, zeros_like(mean), zeros_like(variance), zeros_like(global_step)
    return bprop
@bprop_getters.register(P.CorrectionMul)
 def get_bprop_correction_mul(self):
    """Generate bprop for CorrectionMul"""
    grad = P.CorrectionMulGrad()
    def bprop(x, batch_std, running_std, out, dout):
        dx, d_batch_std = grad(dout, x, batch_std, running_std)
        return dx, d_batch_std, zeros_like(running_std)
    return bprop
@bprop_getters.register(P.BatchNormFold2)
 def get_bprop_batchnorm_fold2(self):
    """Generate bprop for CorrectionAdd"""
    op_f = P.BatchNormFold2Grad(freeze_bn=self.freeze_bn)
    def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout):
        d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std,
                                                               running_mean, global_step)
        return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \
               zeros_like(global_step)
    return bprop
--- a/mindspore/ops/operations/init.py
+++ b/mindspore/ops/operations/init.py
@ -59,7 +59,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                     LogSoftmax,
                     MaxPool,
                     AvgPool, Conv2DBackpropInput,
-                     MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6,
+                     MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6, HSwish, HSigmoid,
                     ResizeBilinear, Sigmoid,
                     SigmoidCrossEntropyWithLogits,
                     SmoothL1Loss, Softmax,
@ -68,7 +68,8 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                     TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl,
                     ApplyRMSProp, ApplyCenteredRMSProp)
 from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
-
+from . import _quant_ops
 from ._quant_ops import *
 __all__ = [
    'TensorAdd',
@ -138,6 +139,8 @@ __all__ = [
    'ReLU6',
    'Elu',
    'Sigmoid',
    'HSwish',
    'HSigmoid',
    'Tanh',
    'RandomChoiceWithMask',
    'ResizeBilinear',
@ -241,4 +244,5 @@ __all__ = [
    "ApplyCenteredRMSProp"
 ]
 __all__.extend(_quant_ops.__all__)
 __all__.sort()
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@ -805,6 +805,38 @@ class SigmoidGrad(PrimitiveWithInfer):
        return out
 class HSigmoidGrad(PrimitiveWithInfer):
    """Gets the gradient of HSigmoid operation."""
    @prim_attr_register
    def __init__(self):
        self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
    def infer_shape(self, y_grad_shape, x_shape):
        return x_shape
    def infer_dtype(self, y_grad_dtype, x_dtype):
        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
        validator.check_typename("x dtype", x_dtype, (mstype.float16, mstype.float32))
        return x_dtype
 class HSwishGrad(PrimitiveWithInfer):
    """Gets the gradient of HSwish operation."""
    @prim_attr_register
    def __init__(self):
        self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
    def infer_shape(self, y_grad_shape, x_shape):
        return x_shape
    def infer_dtype(self, y_grad_dtype, x_dtype):
        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
        validator.check_typename("x_ dtype", x_dtype, (mstype.float16, mstype.float32))
        return x_dtype
 class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
    """Computes the gradients of `SigmoidCrossEntropyWithLogits`."""
--- a/mindspore/ops/operations/_quant_ops.py
+++ b/mindspore/ops/operations/_quant_ops.py
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@ -207,7 +207,7 @@ class ReLU6(PrimitiveWithInfer):
 class Elu(PrimitiveWithInfer):
-    """
+    r"""
    Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
    The data type of input tensor should be float.
@ -242,6 +242,40 @@ class Elu(PrimitiveWithInfer):
        return input_x
 class HSwish(PrimitiveWithInfer):
    r"""
    Hard swish activation function.
    Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
    Hard swish is defined as:
    .. math::
        \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
    Inputs:
        - **input_data** (Tensor) - The input of Hswish.
    Outputs:
        Tensor, with the same type and shape as the `input_data`.
    """
    @prim_attr_register
    def __init__(self):
        self.init_prim_io_names(inputs=['x'], outputs=['output'])
    def infer_shape(self, xshape):
        return xshape
    def infer_dtype(self, x_dtype):
        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
        return x_dtype
 class Sigmoid(PrimitiveWithInfer):
    r"""
    Sigmoid activation function.
@ -258,6 +292,7 @@ class Sigmoid(PrimitiveWithInfer):
    Outputs:
        Tensor, with the same type and shape as the input_x.
    """
    @prim_attr_register
@ -273,6 +308,40 @@ class Sigmoid(PrimitiveWithInfer):
        return input_x
 class HSigmoid(PrimitiveWithInfer):
    r"""
    Hard sigmoid activation function.
    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
    Hard sigmoid is defined as:
    .. math::
        \text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
    Inputs:
        - **input_data** (Tensor) - The input of HSigmoid.
    Outputs:
        Tensor, with the same type and shape as the `input_data`.
    """
    @prim_attr_register
    def __init__(self):
        self.init_prim_io_names(inputs=['x'], outputs=['output'])
    def infer_shape(self, x_shape):
        return x_shape
    def infer_dtype(self, x_dtype):
        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
        return x_dtype
 class Tanh(PrimitiveWithInfer):
    r"""
    Tanh activation function.
--- a/tests/ut/python/nn/test_dense.py
+++ b/tests/ut/python/nn/test_dense.py
@ -27,11 +27,6 @@ def test_dense_none():
        nn.Dense(3, 2, None, None)
 def test_dense_invalid_activation():
    with pytest.raises(KeyError):
        nn.Dense(3, 2, activation='relu6')
@non_graph_engine
 def test_dense_str_activation():
    dense = nn.Dense(1, 1, activation='relu')
--- a/tests/ut/python/pynative_mode/nn/test_activation.py
+++ b/tests/ut/python/pynative_mode/nn/test_activation.py
@ -51,11 +51,6 @@ def test_activation_empty():
    assert nn.get_activation('') is None
 def test_activation_invalid():
    with pytest.raises(KeyError):
        nn.get_activation('relu6')
 # test softmax
 def test_softmax_axis():
    layer = nn.Softmax(1)
--- a/tests/ut/python/pynative_mode/nn/test_dense.py
+++ b/tests/ut/python/pynative_mode/nn/test_dense.py
@ -68,11 +68,6 @@ def test_dense_none():
        nn.Dense(3, 2, None, None)
 def test_dense_invalid_activation():
    with pytest.raises(KeyError):
        nn.Dense(3, 2, activation='relu6')
 def test_dense_str_activation():
    dense = nn.Dense(1, 1, activation='relu')
    assert isinstance(dense.activation, nn.ReLU)