quantization aware training frontend operators define.

pull/212/head
chenzomi 5 years ago
parent c75f75a3e1
commit d64f662c76

@ -17,7 +17,7 @@ Layer.
The high-level components(Cells) used to construct the neural network. The high-level components(Cells) used to construct the neural network.
""" """
from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU, HSigmoid, HSwish
from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm
from .container import SequentialCell, CellList from .container import SequentialCell, CellList
from .conv import Conv2d, Conv2dTranspose from .conv import Conv2d, Conv2dTranspose
@ -26,8 +26,9 @@ from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradi
from .embedding import Embedding from .embedding import Embedding
from .pooling import AvgPool2d, MaxPool2d from .pooling import AvgPool2d, MaxPool2d
__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid', 'PReLU', 'get_activation', 'LeakyReLU', __all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
'BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'ELU', 'PReLU', 'get_activation', 'LeakyReLU', 'HSigmoid', 'HSwish', 'ELU',
'BatchNorm1d', 'BatchNorm2d', 'LayerNorm',
'SequentialCell', 'CellList', 'SequentialCell', 'CellList',
'Conv2d', 'Conv2dTranspose', 'Conv2d', 'Conv2dTranspose',
'LSTM', 'LSTM',

File diff suppressed because it is too large Load Diff

@ -234,7 +234,7 @@ class Tanh(Cell):
class GELU(Cell): class GELU(Cell):
""" r"""
Gaussian error linear unit activation function. Gaussian error linear unit activation function.
Applies GELU function to each element of the input. The input is a Tensor with any valid shape. Applies GELU function to each element of the input. The input is a Tensor with any valid shape.
@ -332,15 +332,74 @@ class PReLU(Cell):
return v return v
class HSwish(Cell):
r"""
rHard swish activation function.
Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
Hard swish is defined as:
.. math::
\text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of Hswish.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
def __init__(self):
super(HSwish, self).__init__()
self.hswish = P.HSwish()
def construct(self, x):
return self.hswish(x)
class HSigmoid(Cell):
r"""
Hard sigmoid activation function.
Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
Hard sigmoid is defined as:
.. math::
\text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of HSigmoid.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
def __init__(self):
super(HSigmoid, self).__init__()
self.hsigmoid = P.HSigmoid()
def construct(self, x):
return self.hsigmoid(x)
_activation = { _activation = {
'softmax': Softmax, 'softmax': Softmax,
'logsoftmax': LogSoftmax, 'logsoftmax': LogSoftmax,
'relu': ReLU, 'relu': ReLU,
'relu6': ReLU6,
'tanh': Tanh, 'tanh': Tanh,
'gelu': GELU, 'gelu': GELU,
'sigmoid': Sigmoid, 'sigmoid': Sigmoid,
'prelu': PReLU, 'prelu': PReLU,
'leakyrelu': LeakyReLU 'leakyrelu': LeakyReLU,
'hswish': HSwish,
'hsigmoid': HSigmoid,
} }

@ -172,6 +172,28 @@ def get_bprop_relu6(self):
return bprop return bprop
@bprop_getters.register(P.HSwish)
def get_bprop_hswish(self):
"""Grad definition for `HSwish` operation."""
input_grad = G.HSwishGrad()
def bprop(x, out, dout):
dx = input_grad(dout, x)
return (dx,)
return bprop
@bprop_getters.register(P.HSigmoid)
def get_bprop_hsigmoid(self):
"""Grad definition for `HSigmoid` operation."""
input_grad = G.HSigmoidGrad()
def bprop(x, out, dout):
dx = input_grad(dout, x)
return (dx,)
return bprop
@bprop_getters.register(P.Elu) @bprop_getters.register(P.Elu)
def get_bprop_elu(self): def get_bprop_elu(self):
"""Grad definition for `Elu` operation.""" """Grad definition for `Elu` operation."""

@ -0,0 +1,82 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Generate bprop for aware quantization ops"""
from .. import operations as P
from .grad_base import bprop_getters
from ..composite.multitype_ops.zeros_like_impl import zeros_like
@bprop_getters.register(P.FakeQuantWithMinMax)
def get_bprop_fakequant_with_minmax(self):
"""Generate bprop for FakeQuantWithMinMax"""
op = P.FakeQuantWithMinMaxGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
def bprop(x, x_min, x_max, out, dout):
dx = op(dout, x, x_min, x_max)
return dx, zeros_like(x_min), zeros_like(x_max)
return bprop
@bprop_getters.register(P.FakeQuantWithMinMaxPerChannel)
def get_bprop_fakequant_with_minmax_perchannel(self):
"""Generate bprop for FakeQuantWithMinMaxPerChannel"""
op = P.FakeQuantWithMinMaxPerChannelGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
def bprop(x, x_min, x_max, out, dout):
dx = op(dout, x, x_min, x_max)
return dx, zeros_like(x_min), zeros_like(x_max)
return bprop
@bprop_getters.register(P.BatchNormFold)
def get_bprop_batchnorm_fold(self):
"""Generate bprop for BatchNormFold"""
op = P.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn)
def bprop(x, mean, variance, global_step, out, dout):
dx = op(dout[0], dout[1], x, out[0], out[1], global_step)
return dx, zeros_like(mean), zeros_like(variance), zeros_like(global_step)
return bprop
@bprop_getters.register(P.CorrectionMul)
def get_bprop_correction_mul(self):
"""Generate bprop for CorrectionMul"""
grad = P.CorrectionMulGrad()
def bprop(x, batch_std, running_std, out, dout):
dx, d_batch_std = grad(dout, x, batch_std, running_std)
return dx, d_batch_std, zeros_like(running_std)
return bprop
@bprop_getters.register(P.BatchNormFold2)
def get_bprop_batchnorm_fold2(self):
"""Generate bprop for CorrectionAdd"""
op_f = P.BatchNormFold2Grad(freeze_bn=self.freeze_bn)
def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout):
d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std,
running_mean, global_step)
return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \
zeros_like(global_step)
return bprop

@ -59,7 +59,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
LogSoftmax, LogSoftmax,
MaxPool, MaxPool,
AvgPool, Conv2DBackpropInput, AvgPool, Conv2DBackpropInput,
MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6, MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6, HSwish, HSigmoid,
ResizeBilinear, Sigmoid, ResizeBilinear, Sigmoid,
SigmoidCrossEntropyWithLogits, SigmoidCrossEntropyWithLogits,
SmoothL1Loss, Softmax, SmoothL1Loss, Softmax,
@ -68,7 +68,8 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl,
ApplyRMSProp, ApplyCenteredRMSProp) ApplyRMSProp, ApplyCenteredRMSProp)
from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
from . import _quant_ops
from ._quant_ops import *
__all__ = [ __all__ = [
'TensorAdd', 'TensorAdd',
@ -138,6 +139,8 @@ __all__ = [
'ReLU6', 'ReLU6',
'Elu', 'Elu',
'Sigmoid', 'Sigmoid',
'HSwish',
'HSigmoid',
'Tanh', 'Tanh',
'RandomChoiceWithMask', 'RandomChoiceWithMask',
'ResizeBilinear', 'ResizeBilinear',
@ -241,4 +244,5 @@ __all__ = [
"ApplyCenteredRMSProp" "ApplyCenteredRMSProp"
] ]
__all__.extend(_quant_ops.__all__)
__all__.sort() __all__.sort()

@ -805,6 +805,38 @@ class SigmoidGrad(PrimitiveWithInfer):
return out return out
class HSigmoidGrad(PrimitiveWithInfer):
"""Gets the gradient of HSigmoid operation."""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
def infer_shape(self, y_grad_shape, x_shape):
return x_shape
def infer_dtype(self, y_grad_dtype, x_dtype):
validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
validator.check_typename("x dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class HSwishGrad(PrimitiveWithInfer):
"""Gets the gradient of HSwish operation."""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
def infer_shape(self, y_grad_shape, x_shape):
return x_shape
def infer_dtype(self, y_grad_dtype, x_dtype):
validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
validator.check_typename("x_ dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer): class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
"""Computes the gradients of `SigmoidCrossEntropyWithLogits`.""" """Computes the gradients of `SigmoidCrossEntropyWithLogits`."""

File diff suppressed because it is too large Load Diff

@ -207,7 +207,7 @@ class ReLU6(PrimitiveWithInfer):
class Elu(PrimitiveWithInfer): class Elu(PrimitiveWithInfer):
""" r"""
Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise. Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
The data type of input tensor should be float. The data type of input tensor should be float.
@ -242,6 +242,40 @@ class Elu(PrimitiveWithInfer):
return input_x return input_x
class HSwish(PrimitiveWithInfer):
r"""
Hard swish activation function.
Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
Hard swish is defined as:
.. math::
\text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of Hswish.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output'])
def infer_shape(self, xshape):
return xshape
def infer_dtype(self, x_dtype):
validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class Sigmoid(PrimitiveWithInfer): class Sigmoid(PrimitiveWithInfer):
r""" r"""
Sigmoid activation function. Sigmoid activation function.
@ -258,6 +292,7 @@ class Sigmoid(PrimitiveWithInfer):
Outputs: Outputs:
Tensor, with the same type and shape as the input_x. Tensor, with the same type and shape as the input_x.
""" """
@prim_attr_register @prim_attr_register
@ -273,6 +308,40 @@ class Sigmoid(PrimitiveWithInfer):
return input_x return input_x
class HSigmoid(PrimitiveWithInfer):
r"""
Hard sigmoid activation function.
Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
Hard sigmoid is defined as:
.. math::
\text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of HSigmoid.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output'])
def infer_shape(self, x_shape):
return x_shape
def infer_dtype(self, x_dtype):
validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class Tanh(PrimitiveWithInfer): class Tanh(PrimitiveWithInfer):
r""" r"""
Tanh activation function. Tanh activation function.

@ -27,11 +27,6 @@ def test_dense_none():
nn.Dense(3, 2, None, None) nn.Dense(3, 2, None, None)
def test_dense_invalid_activation():
with pytest.raises(KeyError):
nn.Dense(3, 2, activation='relu6')
@non_graph_engine @non_graph_engine
def test_dense_str_activation(): def test_dense_str_activation():
dense = nn.Dense(1, 1, activation='relu') dense = nn.Dense(1, 1, activation='relu')

@ -51,11 +51,6 @@ def test_activation_empty():
assert nn.get_activation('') is None assert nn.get_activation('') is None
def test_activation_invalid():
with pytest.raises(KeyError):
nn.get_activation('relu6')
# test softmax # test softmax
def test_softmax_axis(): def test_softmax_axis():
layer = nn.Softmax(1) layer = nn.Softmax(1)

@ -68,11 +68,6 @@ def test_dense_none():
nn.Dense(3, 2, None, None) nn.Dense(3, 2, None, None)
def test_dense_invalid_activation():
with pytest.raises(KeyError):
nn.Dense(3, 2, activation='relu6')
def test_dense_str_activation(): def test_dense_str_activation():
dense = nn.Dense(1, 1, activation='relu') dense = nn.Dense(1, 1, activation='relu')
assert isinstance(dense.activation, nn.ReLU) assert isinstance(dense.activation, nn.ReLU)

Loading…
Cancel
Save