quantization aware training frontend operators define.

5 years ago · d64f662c76
parent c75f75a3e1
commit d64f662c76
12 changed files with 1505 additions and 23 deletions
--- a/mindspore/nn/layer/init.py
+++ b/mindspore/nn/layer/init.py
@ -17,7 +17,7 @@ Layer.

 The high-level components(Cells) used to construct the neural network.
 """
-from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU
+from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU, HSigmoid, HSwish
 from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm
 from .container import SequentialCell, CellList
 from .conv import Conv2d, Conv2dTranspose
@ -26,8 +26,9 @@ from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradi
 from .embedding import Embedding
 from .pooling import AvgPool2d, MaxPool2d

-__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid', 'PReLU', 'get_activation', 'LeakyReLU',
-           'BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'ELU',
+__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
+           'PReLU', 'get_activation', 'LeakyReLU', 'HSigmoid', 'HSwish', 'ELU',
+           'BatchNorm1d', 'BatchNorm2d', 'LayerNorm',
           'SequentialCell', 'CellList',
           'Conv2d', 'Conv2dTranspose',
           'LSTM',
--- a/mindspore/nn/layer/_quant.py
+++ b/mindspore/nn/layer/_quant.py
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@ -234,7 +234,7 @@ class Tanh(Cell):


 class GELU(Cell):
-    """
+    r"""
    Gaussian error linear unit activation function.

    Applies GELU function to each element of the input. The input is a Tensor with any valid shape.
@ -332,15 +332,74 @@ class PReLU(Cell):
        return v


+class HSwish(Cell):
+    r"""
+    rHard swish activation function.
+
+    Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
+
+    Hard swish is defined as:
+
+    .. math::
+        \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
+
+    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
+
+    Inputs:
+        - **input_data** (Tensor) - The input of Hswish.
+
+    Outputs:
+        Tensor, with the same type and shape as the `input_data`.
+
+    """
+    def __init__(self):
+        super(HSwish, self).__init__()
+        self.hswish = P.HSwish()
+
+    def construct(self, x):
+        return self.hswish(x)
+
+
+class HSigmoid(Cell):
+    r"""
+    Hard sigmoid activation function.
+
+    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
+
+    Hard sigmoid is defined as:
+
+    .. math::
+        \text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
+
+    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
+
+    Inputs:
+        - **input_data** (Tensor) - The input of HSigmoid.
+
+    Outputs:
+        Tensor, with the same type and shape as the `input_data`.
+
+    """
+    def __init__(self):
+        super(HSigmoid, self).__init__()
+        self.hsigmoid = P.HSigmoid()
+
+    def construct(self, x):
+        return self.hsigmoid(x)
+
+
 _activation = {
    'softmax': Softmax,
    'logsoftmax': LogSoftmax,
    'relu': ReLU,
+    'relu6': ReLU6,
    'tanh': Tanh,
    'gelu': GELU,
    'sigmoid': Sigmoid,
    'prelu': PReLU,
-    'leakyrelu': LeakyReLU
+    'leakyrelu': LeakyReLU,
+    'hswish': HSwish,
+    'hsigmoid': HSigmoid,
 }


--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@ -172,6 +172,28 @@ def get_bprop_relu6(self):
    return bprop


+@bprop_getters.register(P.HSwish)
+def get_bprop_hswish(self):
+    """Grad definition for `HSwish` operation."""
+    input_grad = G.HSwishGrad()
+
+    def bprop(x, out, dout):
+        dx = input_grad(dout, x)
+        return (dx,)
+    return bprop
+
+
+@bprop_getters.register(P.HSigmoid)
+def get_bprop_hsigmoid(self):
+    """Grad definition for `HSigmoid` operation."""
+    input_grad = G.HSigmoidGrad()
+
+    def bprop(x, out, dout):
+        dx = input_grad(dout, x)
+        return (dx,)
+    return bprop
+
+
@bprop_getters.register(P.Elu)
 def get_bprop_elu(self):
    """Grad definition for `Elu` operation."""
--- a/mindspore/ops/_grad/grad_quant_ops.py
+++ b/mindspore/ops/_grad/grad_quant_ops.py
@ -0,0 +1,82 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Generate bprop for aware quantization ops"""
+
+from .. import operations as P
+from .grad_base import bprop_getters
+from ..composite.multitype_ops.zeros_like_impl import zeros_like
+
+
+@bprop_getters.register(P.FakeQuantWithMinMax)
+def get_bprop_fakequant_with_minmax(self):
+    """Generate bprop for FakeQuantWithMinMax"""
+    op = P.FakeQuantWithMinMaxGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
+
+    def bprop(x, x_min, x_max, out, dout):
+        dx = op(dout, x, x_min, x_max)
+        return dx, zeros_like(x_min), zeros_like(x_max)
+
+    return bprop
+
+
+@bprop_getters.register(P.FakeQuantWithMinMaxPerChannel)
+def get_bprop_fakequant_with_minmax_perchannel(self):
+    """Generate bprop for FakeQuantWithMinMaxPerChannel"""
+    op = P.FakeQuantWithMinMaxPerChannelGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
+
+    def bprop(x, x_min, x_max, out, dout):
+        dx = op(dout, x, x_min, x_max)
+        return dx, zeros_like(x_min), zeros_like(x_max)
+
+    return bprop
+
+
+@bprop_getters.register(P.BatchNormFold)
+def get_bprop_batchnorm_fold(self):
+    """Generate bprop for BatchNormFold"""
+    op = P.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn)
+
+    def bprop(x, mean, variance, global_step, out, dout):
+        dx = op(dout[0], dout[1], x, out[0], out[1], global_step)
+        return dx, zeros_like(mean), zeros_like(variance), zeros_like(global_step)
+
+    return bprop
+
+
+@bprop_getters.register(P.CorrectionMul)
+def get_bprop_correction_mul(self):
+    """Generate bprop for CorrectionMul"""
+    grad = P.CorrectionMulGrad()
+
+    def bprop(x, batch_std, running_std, out, dout):
+        dx, d_batch_std = grad(dout, x, batch_std, running_std)
+        return dx, d_batch_std, zeros_like(running_std)
+
+    return bprop
+
+
+@bprop_getters.register(P.BatchNormFold2)
+def get_bprop_batchnorm_fold2(self):
+    """Generate bprop for CorrectionAdd"""
+    op_f = P.BatchNormFold2Grad(freeze_bn=self.freeze_bn)
+
+    def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout):
+        d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std,
+                                                               running_mean, global_step)
+        return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \
+               zeros_like(global_step)
+
+    return bprop
--- a/mindspore/ops/operations/init.py
+++ b/mindspore/ops/operations/init.py
@ -59,7 +59,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                     LogSoftmax,
                     MaxPool,
                     AvgPool, Conv2DBackpropInput,
-                     MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6,
+                     MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6, HSwish, HSigmoid,
                     ResizeBilinear, Sigmoid,
                     SigmoidCrossEntropyWithLogits,
                     SmoothL1Loss, Softmax,
@ -68,7 +68,8 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
                     TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl,
                     ApplyRMSProp, ApplyCenteredRMSProp)
 from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
-
+from . import _quant_ops
+from ._quant_ops import *

 __all__ = [
    'TensorAdd',
@ -138,6 +139,8 @@ __all__ = [
    'ReLU6',
    'Elu',
    'Sigmoid',
+    'HSwish',
+    'HSigmoid',
    'Tanh',
    'RandomChoiceWithMask',
    'ResizeBilinear',
@ -241,4 +244,5 @@ __all__ = [
    "ApplyCenteredRMSProp"
 ]

+__all__.extend(_quant_ops.__all__)
 __all__.sort()
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@ -805,6 +805,38 @@ class SigmoidGrad(PrimitiveWithInfer):
        return out


+class HSigmoidGrad(PrimitiveWithInfer):
+    """Gets the gradient of HSigmoid operation."""
+
+    @prim_attr_register
+    def __init__(self):
+        self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
+
+    def infer_shape(self, y_grad_shape, x_shape):
+        return x_shape
+
+    def infer_dtype(self, y_grad_dtype, x_dtype):
+        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
+        validator.check_typename("x dtype", x_dtype, (mstype.float16, mstype.float32))
+        return x_dtype
+
+
+class HSwishGrad(PrimitiveWithInfer):
+    """Gets the gradient of HSwish operation."""
+
+    @prim_attr_register
+    def __init__(self):
+        self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
+
+    def infer_shape(self, y_grad_shape, x_shape):
+        return x_shape
+
+    def infer_dtype(self, y_grad_dtype, x_dtype):
+        validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
+        validator.check_typename("x_ dtype", x_dtype, (mstype.float16, mstype.float32))
+        return x_dtype
+
+
 class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
    """Computes the gradients of `SigmoidCrossEntropyWithLogits`."""

--- a/mindspore/ops/operations/_quant_ops.py
+++ b/mindspore/ops/operations/_quant_ops.py
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@ -207,7 +207,7 @@ class ReLU6(PrimitiveWithInfer):


 class Elu(PrimitiveWithInfer):
-    """
+    r"""
    Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
    The data type of input tensor should be float.

@ -242,6 +242,40 @@ class Elu(PrimitiveWithInfer):
        return input_x


+class HSwish(PrimitiveWithInfer):
+    r"""
+    Hard swish activation function.
+
+    Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
+
+    Hard swish is defined as:
+
+    .. math::
+        \text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
+
+    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
+
+    Inputs:
+        - **input_data** (Tensor) - The input of Hswish.
+
+    Outputs:
+        Tensor, with the same type and shape as the `input_data`.
+
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, xshape):
+        return xshape
+
+    def infer_dtype(self, x_dtype):
+        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
+        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
+        return x_dtype
+
+
+
 class Sigmoid(PrimitiveWithInfer):
    r"""
    Sigmoid activation function.
@ -258,6 +292,7 @@ class Sigmoid(PrimitiveWithInfer):

    Outputs:
        Tensor, with the same type and shape as the input_x.
+
    """

    @prim_attr_register
@ -273,6 +308,40 @@ class Sigmoid(PrimitiveWithInfer):
        return input_x


+class HSigmoid(PrimitiveWithInfer):
+    r"""
+    Hard sigmoid activation function.
+
+    Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
+
+    Hard sigmoid is defined as:
+
+    .. math::
+        \text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
+
+    where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
+
+    Inputs:
+        - **input_data** (Tensor) - The input of HSigmoid.
+
+    Outputs:
+        Tensor, with the same type and shape as the `input_data`.
+
+    """
+
+    @prim_attr_register
+    def __init__(self):
+        self.init_prim_io_names(inputs=['x'], outputs=['output'])
+
+    def infer_shape(self, x_shape):
+        return x_shape
+
+    def infer_dtype(self, x_dtype):
+        validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
+        validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
+        return x_dtype
+
+
 class Tanh(PrimitiveWithInfer):
    r"""
    Tanh activation function.
--- a/tests/ut/python/nn/test_dense.py
+++ b/tests/ut/python/nn/test_dense.py
@ -27,11 +27,6 @@ def test_dense_none():
        nn.Dense(3, 2, None, None)


-def test_dense_invalid_activation():
-    with pytest.raises(KeyError):
-        nn.Dense(3, 2, activation='relu6')
-
-
@non_graph_engine
 def test_dense_str_activation():
    dense = nn.Dense(1, 1, activation='relu')
--- a/tests/ut/python/pynative_mode/nn/test_activation.py
+++ b/tests/ut/python/pynative_mode/nn/test_activation.py
@ -51,11 +51,6 @@ def test_activation_empty():
    assert nn.get_activation('') is None


-def test_activation_invalid():
-    with pytest.raises(KeyError):
-        nn.get_activation('relu6')
-
-
 # test softmax
 def test_softmax_axis():
    layer = nn.Softmax(1)
--- a/tests/ut/python/pynative_mode/nn/test_dense.py
+++ b/tests/ut/python/pynative_mode/nn/test_dense.py
@ -68,11 +68,6 @@ def test_dense_none():
        nn.Dense(3, 2, None, None)


-def test_dense_invalid_activation():
-    with pytest.raises(KeyError):
-        nn.Dense(3, 2, activation='relu6')
-
-
 def test_dense_str_activation():
    dense = nn.Dense(1, 1, activation='relu')
    assert isinstance(dense.activation, nn.ReLU)