Implemented AddQuantDequantPass in imperative quantization. (#26692)

* Implemented AddQuantDequantPass in imperative quantization.

* Supported LeakyReLU Quantization

* For meeting coverage rate.

* Changed the file name of test of AddQuantDequant

* Implemented more Quantized NoWeightLayers.

* Fix the loss cannot align problem between static and dynamic model quantization, add swish as supported quantized layer in imperative quantization.

* remove noweight_list

* support 2.0 API such as Pool2D and ReLu
revert-31562-mean
huangxu96 4 years ago committed by GitHub
parent a60f17b89d
commit ee623bff64
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -86,7 +86,7 @@ class ImperativeQuantAware(object):
'moving_average_abs_max', the static quantization scale will be calculated
during training and used in inference.
moving_rate(float): the parameter for 'moving_average_abs_max' quantization.
quantizable_op_type(list[str]): List the type of layers that will be quantized.
quantizable_layer_type(list[str]): List the type of layers that will be quantized.
Default is ['Conv2D', 'Linear']. The quantizable_op_type in
QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
@ -229,7 +229,17 @@ class ImperativeQuantAware(object):
"'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now."
% (str(weight_quantize_type)))
self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear}
self._quant_layers_map = {
'Conv2D': Conv2D,
'Linear': Linear,
'Pool2D': Pool2D,
'ReLU': ReLU,
'LeakyReLU': LeakyReLU,
'ReLU6': ReLU6,
'Softmax': Softmax,
'Tanh': Tanh,
'Swish': Swish
}
self._quantizable_layer_type = tuple(
self._quant_layers_map[layer]
if layer in self._quant_layers_map else layer
@ -262,7 +272,6 @@ class ImperativeQuantAware(object):
for i in range(len(scopes) - 1):
obj = getattr(parent, scopes[i])
parent = obj
quant_layer = self._get_quantized_counterpart(layer)
setattr(quant_layer, "layer_name", layer.full_name())
setattr(obj, target, quant_layer)
@ -285,7 +294,12 @@ class ImperativeQuantAware(object):
layer.full_name()))
sys.exit(-1)
quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
if quantized_counterpart[index] not in layer_with_weight:
quant_layer_class_name = 'QuantizedNoweightLayer'
else:
quant_layer_class_name = quantized_counterpart[index]
quantized_layer = quant_nn.__dict__[quant_layer_class_name](
layer, self._weight_bits, self._activation_bits, self._moving_rate,
self._weight_quantize_type, self._activation_quantize_type,
self._weight_pre_layer, self._act_pre_layer,

@ -24,9 +24,9 @@ from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.nn import functional as F
__all__ = [
'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax',
'MovingAverageAbsMaxScale'
'FakeQuantMovingAverage', 'FakeQuantAbsMax',
'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
]
@ -478,6 +478,30 @@ class QuantizedLinear(layers.Layer):
return out
class QuantizedNoweightLayer(layers.Layer):
def __init__(self,
layer,
weight_bits=8,
activation_bits=8,
moving_rate=0.9,
*args,
**kwargs):
super(QuantizedNoweightLayer, self).__init__()
self._layer = layer
self._fake_quant_input = _get_fake_quant_type(
'moving_average_abs_max',
name=layer.full_name(),
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)
def forward(self, input):
quant_input = self._fake_quant_input(input)
return self._layer.forward(quant_input)
class MovingAverageAbsMaxScale(layers.Layer):
def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
r"""

@ -270,6 +270,12 @@ list(REMOVE_ITEM TEST_OPS
LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
# only tests on singal GPU environment
LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
CUDA_VISIBLE_DEVICES=0)
# fix
if(WIN32)
SET(SINGLE_CARD_TEST_OPS
@ -305,6 +311,7 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
if(LINUX AND WITH_MKLDNN)
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)

@ -86,9 +86,9 @@ def StaticLenet(data, num_classes=10):
size=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
fc4 = fluid.layers.softmax(fc3, use_cudnn=True)
fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
return fc4
return fc3
class ImperativeLenet(fluid.dygraph.Layer):

Loading…
Cancel
Save