quantization aware training frontend operators define.

pull/212/head
chenzomi 5 years ago
parent c75f75a3e1
commit d64f662c76

@ -17,7 +17,7 @@ Layer.
The high-level components(Cells) used to construct the neural network.
"""
from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU
from .activation import Softmax, LogSoftmax, ReLU, ReLU6, Tanh, GELU, ELU, Sigmoid, PReLU, get_activation, LeakyReLU, HSigmoid, HSwish
from .normalization import BatchNorm1d, BatchNorm2d, LayerNorm
from .container import SequentialCell, CellList
from .conv import Conv2d, Conv2dTranspose
@ -26,8 +26,9 @@ from .basic import Dropout, Flatten, Dense, ClipByNorm, Norm, OneHot, ImageGradi
from .embedding import Embedding
from .pooling import AvgPool2d, MaxPool2d
__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid', 'PReLU', 'get_activation', 'LeakyReLU',
'BatchNorm1d', 'BatchNorm2d', 'LayerNorm', 'ELU',
__all__ = ['Softmax', 'LogSoftmax', 'ReLU', 'ReLU6', 'Tanh', 'GELU', 'Sigmoid',
'PReLU', 'get_activation', 'LeakyReLU', 'HSigmoid', 'HSwish', 'ELU',
'BatchNorm1d', 'BatchNorm2d', 'LayerNorm',
'SequentialCell', 'CellList',
'Conv2d', 'Conv2dTranspose',
'LSTM',

File diff suppressed because it is too large Load Diff

@ -234,7 +234,7 @@ class Tanh(Cell):
class GELU(Cell):
"""
r"""
Gaussian error linear unit activation function.
Applies GELU function to each element of the input. The input is a Tensor with any valid shape.
@ -332,15 +332,74 @@ class PReLU(Cell):
return v
class HSwish(Cell):
r"""
rHard swish activation function.
Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
Hard swish is defined as:
.. math::
\text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of Hswish.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
def __init__(self):
super(HSwish, self).__init__()
self.hswish = P.HSwish()
def construct(self, x):
return self.hswish(x)
class HSigmoid(Cell):
r"""
Hard sigmoid activation function.
Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
Hard sigmoid is defined as:
.. math::
\text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of HSigmoid.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
def __init__(self):
super(HSigmoid, self).__init__()
self.hsigmoid = P.HSigmoid()
def construct(self, x):
return self.hsigmoid(x)
_activation = {
'softmax': Softmax,
'logsoftmax': LogSoftmax,
'relu': ReLU,
'relu6': ReLU6,
'tanh': Tanh,
'gelu': GELU,
'sigmoid': Sigmoid,
'prelu': PReLU,
'leakyrelu': LeakyReLU
'leakyrelu': LeakyReLU,
'hswish': HSwish,
'hsigmoid': HSigmoid,
}

@ -172,6 +172,28 @@ def get_bprop_relu6(self):
return bprop
@bprop_getters.register(P.HSwish)
def get_bprop_hswish(self):
"""Grad definition for `HSwish` operation."""
input_grad = G.HSwishGrad()
def bprop(x, out, dout):
dx = input_grad(dout, x)
return (dx,)
return bprop
@bprop_getters.register(P.HSigmoid)
def get_bprop_hsigmoid(self):
"""Grad definition for `HSigmoid` operation."""
input_grad = G.HSigmoidGrad()
def bprop(x, out, dout):
dx = input_grad(dout, x)
return (dx,)
return bprop
@bprop_getters.register(P.Elu)
def get_bprop_elu(self):
"""Grad definition for `Elu` operation."""

@ -0,0 +1,82 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Generate bprop for aware quantization ops"""
from .. import operations as P
from .grad_base import bprop_getters
from ..composite.multitype_ops.zeros_like_impl import zeros_like
@bprop_getters.register(P.FakeQuantWithMinMax)
def get_bprop_fakequant_with_minmax(self):
"""Generate bprop for FakeQuantWithMinMax"""
op = P.FakeQuantWithMinMaxGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
def bprop(x, x_min, x_max, out, dout):
dx = op(dout, x, x_min, x_max)
return dx, zeros_like(x_min), zeros_like(x_max)
return bprop
@bprop_getters.register(P.FakeQuantWithMinMaxPerChannel)
def get_bprop_fakequant_with_minmax_perchannel(self):
"""Generate bprop for FakeQuantWithMinMaxPerChannel"""
op = P.FakeQuantWithMinMaxPerChannelGrad(num_bits=self.num_bits, quant_delay=self.quant_delay)
def bprop(x, x_min, x_max, out, dout):
dx = op(dout, x, x_min, x_max)
return dx, zeros_like(x_min), zeros_like(x_max)
return bprop
@bprop_getters.register(P.BatchNormFold)
def get_bprop_batchnorm_fold(self):
"""Generate bprop for BatchNormFold"""
op = P.BatchNormFoldGrad(self.epsilon, self.is_training, self.freeze_bn)
def bprop(x, mean, variance, global_step, out, dout):
dx = op(dout[0], dout[1], x, out[0], out[1], global_step)
return dx, zeros_like(mean), zeros_like(variance), zeros_like(global_step)
return bprop
@bprop_getters.register(P.CorrectionMul)
def get_bprop_correction_mul(self):
"""Generate bprop for CorrectionMul"""
grad = P.CorrectionMulGrad()
def bprop(x, batch_std, running_std, out, dout):
dx, d_batch_std = grad(dout, x, batch_std, running_std)
return dx, d_batch_std, zeros_like(running_std)
return bprop
@bprop_getters.register(P.BatchNormFold2)
def get_bprop_batchnorm_fold2(self):
"""Generate bprop for CorrectionAdd"""
op_f = P.BatchNormFold2Grad(freeze_bn=self.freeze_bn)
def bprop(x, beta, gamma, batch_std, batch_mean, running_std, running_mean, global_step, out, dout):
d_batch_std, d_batch_mean, d_beta, d_gamma, d_x = op_f(dout, x, gamma, batch_std, batch_mean, running_std,
running_mean, global_step)
return d_x, d_beta, d_gamma, d_batch_std, d_batch_mean, zeros_like(running_std), zeros_like(running_mean), \
zeros_like(global_step)
return bprop

@ -59,7 +59,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
LogSoftmax,
MaxPool,
AvgPool, Conv2DBackpropInput,
MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6,
MaxPoolWithArgmax, OneHot, Pad, PReLU, ReLU, ReLU6, HSwish, HSigmoid,
ResizeBilinear, Sigmoid,
SigmoidCrossEntropyWithLogits,
SmoothL1Loss, Softmax,
@ -68,7 +68,8 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl,
ApplyRMSProp, ApplyCenteredRMSProp)
from .other_ops import Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, CheckValid, MakeRefKey
from . import _quant_ops
from ._quant_ops import *
__all__ = [
'TensorAdd',
@ -138,6 +139,8 @@ __all__ = [
'ReLU6',
'Elu',
'Sigmoid',
'HSwish',
'HSigmoid',
'Tanh',
'RandomChoiceWithMask',
'ResizeBilinear',
@ -241,4 +244,5 @@ __all__ = [
"ApplyCenteredRMSProp"
]
__all__.extend(_quant_ops.__all__)
__all__.sort()

@ -805,6 +805,38 @@ class SigmoidGrad(PrimitiveWithInfer):
return out
class HSigmoidGrad(PrimitiveWithInfer):
"""Gets the gradient of HSigmoid operation."""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
def infer_shape(self, y_grad_shape, x_shape):
return x_shape
def infer_dtype(self, y_grad_dtype, x_dtype):
validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
validator.check_typename("x dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class HSwishGrad(PrimitiveWithInfer):
"""Gets the gradient of HSwish operation."""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['y_grad', 'x'], outputs=['output'])
def infer_shape(self, y_grad_shape, x_shape):
return x_shape
def infer_dtype(self, y_grad_dtype, x_dtype):
validator.check_typename("y_grad dtype", y_grad_dtype, (mstype.float16, mstype.float32))
validator.check_typename("x_ dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class SigmoidCrossEntropyWithLogitsGrad(PrimitiveWithInfer):
"""Computes the gradients of `SigmoidCrossEntropyWithLogits`."""

File diff suppressed because it is too large Load Diff

@ -207,7 +207,7 @@ class ReLU6(PrimitiveWithInfer):
class Elu(PrimitiveWithInfer):
"""
r"""
Computes exponential linear: `alpha * (exp(x) - 1)` if x < 0, `x` otherwise.
The data type of input tensor should be float.
@ -242,6 +242,40 @@ class Elu(PrimitiveWithInfer):
return input_x
class HSwish(PrimitiveWithInfer):
r"""
Hard swish activation function.
Applies hswish-type activation element-wise. The input is a Tensor with any valid shape.
Hard swish is defined as:
.. math::
\text{hswish}(x_{i}) = x_{i} * \frac{ReLU6(x_{i} + 3)}{6},
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of Hswish.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output'])
def infer_shape(self, xshape):
return xshape
def infer_dtype(self, x_dtype):
validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class Sigmoid(PrimitiveWithInfer):
r"""
Sigmoid activation function.
@ -258,6 +292,7 @@ class Sigmoid(PrimitiveWithInfer):
Outputs:
Tensor, with the same type and shape as the input_x.
"""
@prim_attr_register
@ -273,6 +308,40 @@ class Sigmoid(PrimitiveWithInfer):
return input_x
class HSigmoid(PrimitiveWithInfer):
r"""
Hard sigmoid activation function.
Applies hard sigmoid activation element-wise. The input is a Tensor with any valid shape.
Hard sigmoid is defined as:
.. math::
\text{hsigmoid}(x_{i}) = max(0, min(1, \ftac{2 * x_{i} + 5}{10})),
where :math:`x_{i}` is the :math:`i`-th slice along the given dim of the input Tensor.
Inputs:
- **input_data** (Tensor) - The input of HSigmoid.
Outputs:
Tensor, with the same type and shape as the `input_data`.
"""
@prim_attr_register
def __init__(self):
self.init_prim_io_names(inputs=['x'], outputs=['output'])
def infer_shape(self, x_shape):
return x_shape
def infer_dtype(self, x_dtype):
validator.check_subclass("x_dtype", x_dtype, mstype.tensor)
validator.check_typename("x_dtype", x_dtype, (mstype.float16, mstype.float32))
return x_dtype
class Tanh(PrimitiveWithInfer):
r"""
Tanh activation function.

@ -27,11 +27,6 @@ def test_dense_none():
nn.Dense(3, 2, None, None)
def test_dense_invalid_activation():
with pytest.raises(KeyError):
nn.Dense(3, 2, activation='relu6')
@non_graph_engine
def test_dense_str_activation():
dense = nn.Dense(1, 1, activation='relu')

@ -51,11 +51,6 @@ def test_activation_empty():
assert nn.get_activation('') is None
def test_activation_invalid():
with pytest.raises(KeyError):
nn.get_activation('relu6')
# test softmax
def test_softmax_axis():
layer = nn.Softmax(1)

@ -68,11 +68,6 @@ def test_dense_none():
nn.Dense(3, 2, None, None)
def test_dense_invalid_activation():
with pytest.raises(KeyError):
nn.Dense(3, 2, activation='relu6')
def test_dense_str_activation():
dense = nn.Dense(1, 1, activation='relu')
assert isinstance(dense.activation, nn.ReLU)

Loading…
Cancel
Save