Quant nn2.0 (#28764)

* Impelement 2.0 API version Conv2d and Linear layer quantization in imperative mode. * use cudnn softmax in static Lenet * Modified ChannelwiseQAT Unittest for 2.0 API. * For CI python coverage.
5 years ago · 40f5453725
parent b2c8a00745
commit 40f5453725
4 changed files with 94 additions and 150 deletions
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@ -20,7 +20,8 @@ import paddle
 from paddle.fluid import dygraph, core, framework
 from paddle.fluid.executor import Executor
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-from paddle.fluid.dygraph.nn import Conv2D, Linear, BatchNorm, Pool2D, Conv2DTranspose
+from paddle.nn import Linear, Conv2D
+from paddle.fluid.dygraph.nn import BatchNorm, Pool2D, Conv2DTranspose
 from paddle.fluid.io import load_inference_model, save_inference_model
 from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU
 from paddle.fluid.log_helper import get_logger
@ -142,6 +143,8 @@ class ImperativeQuantAware(object):
        self._weight_bits = weight_bits
        self._activation_bits = activation_bits
        self._moving_rate = moving_rate
+        self._activation_quantize_type = activation_quantize_type
+        self._weight_quantize_type = weight_quantize_type

        self._weight_pre_layer = weight_preprocess_layer
        self._act_pre_layer = act_preprocess_layer
@ -172,8 +175,6 @@ class ImperativeQuantAware(object):
                "Unknown weight_quantize_type: '%s'. It can only be "
                "'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now."
                % (str(weight_quantize_type)))
-        self._activation_quantize_type = activation_quantize_type
-        self._weight_quantize_type = weight_quantize_type

        self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear}
        self._quantizable_layer_type = tuple(
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@ -21,6 +21,7 @@ from paddle.fluid.framework import _varbase_creator
 from paddle.fluid.framework import in_dygraph_mode
 from paddle.fluid.initializer import Constant
 from paddle.fluid.data_feeder import check_variable_and_dtype
+from paddle.nn import functional as F

 __all__ = [
    'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
@ -144,7 +145,6 @@ class FakeQuantAbsMax(layers.Layer):
                 quant_on_weight=False):
        super(FakeQuantAbsMax, self).__init__()
        self._quant_bits = quant_bits
-        self._dtype = dtype
        self._name = name
        scale_prefix = "{}.scale".format(
            name) if name else 'quant_dequant.scale'
@ -342,16 +342,17 @@ class QuantizedConv2D(layers.Layer):
        self._groups = getattr(layer, '_groups')
        self._stride = getattr(layer, '_stride')
        self._padding = getattr(layer, '_padding')
+        self._padding_mode = getattr(layer, '_padding_mode')
+        if self._padding_mode != 'zeros':
+            self._reversed_padding_repeated_twice = getattr(
+                layer, '_reversed_padding_repeated_twice')
        self._dilation = getattr(layer, '_dilation')
-        self._act = getattr(layer, '_act')
-        self._use_cudnn = getattr(layer, '_use_cudnn')
-        self._dtype = getattr(layer, '_dtype')
-        self._l_type = getattr(layer, '_l_type')
+        self._data_format = getattr(layer, '_data_format')
        self.weight = getattr(layer, 'weight')
        self.bias = getattr(layer, 'bias')
+
        # For FakeQuant
        self._conv2d_quant_axis = 0
-
        if weight_quant_layer is not None:
            self._fake_quant_weight = weight_quant_layer()
        else:
@ -390,52 +391,22 @@ class QuantizedConv2D(layers.Layer):
            weight = self._weight_preprocess(self.weight)
        quant_weight = self._fake_quant_weight(weight)

-        if in_dygraph_mode() and self._l_type == 'conv2d':
-            attrs = ('strides', self._stride, 'paddings', self._padding,
-                     'dilations', self._dilation, 'groups', self._groups
-                     if self._groups else 1, 'use_cudnn', self._use_cudnn)
-            pre_bias = core.ops.conv2d(quant_input, quant_weight, *attrs)
-
-            pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, self.bias,
-                                                            1)
-            return dygraph_utils._append_activation_in_dygraph(pre_act,
-                                                               self._act)
-        check_variable_and_dtype(quant_input, 'input',
-                                 ['float16', 'float32', 'float64'],
-                                 'QuantizedConv2D')
-        attrs = {
-            'strides': self._stride,
-            'paddings': self._padding,
-            'dilations': self._dilation,
-            'groups': self._groups if self._groups else 1,
-            'use_cudnn': self._use_cudnn,
-            'use_mkldnn': False,
-        }
-        pre_bias = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype)
-
-        self._helper.append_op(
-            type=self._l_type,
-            inputs={
-                'Input': quant_input,
-                'Filter': quant_weight,
-            },
-            outputs={"Output": pre_bias},
-            attrs=attrs)
-
-        if self.bias is not None:
-            pre_act = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype)
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [pre_bias],
-                        'Y': [self.bias]},
-                outputs={'Out': [pre_act]},
-                attrs={'axis': 1})
-        else:
-            pre_act = pre_bias
+        if self._padding_mode != 'zeros':
+            quant_input = F.pad(quant_input,
+                                self._reversed_padding_repeated_twice,
+                                mode=self._padding_mode,
+                                data_format=self._data_format)
+            self._padding = 0

-        return self._helper.append_activation(pre_act, act=self._act)
+        return F.conv2d(
+            quant_input,
+            quant_weight,
+            bias=self.bias,
+            padding=self._padding,
+            stride=self._stride,
+            dilation=self._dilation,
+            groups=self._groups,
+            data_format=self._data_format)


 class QuantizedLinear(layers.Layer):
@ -457,10 +428,9 @@ class QuantizedLinear(layers.Layer):
                 act_quant_layer=None):
        super(QuantizedLinear, self).__init__()
        # For Linear
-        self._act = getattr(layer, '_act')
-        self._dtype = getattr(layer, '_dtype')
        self.weight = getattr(layer, 'weight')
        self.bias = getattr(layer, 'bias')
+        self.name = getattr(layer, 'name')
        # For FakeQuant
        self._linear_quant_axis = 1

@ -503,44 +473,9 @@ class QuantizedLinear(layers.Layer):
            weight = self._weight_preprocess(self.weight)
        quant_weight = self._fake_quant_weight(weight)

-        if in_dygraph_mode():
-            pre_bias = _varbase_creator(dtype=input.dtype)
-            core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X',
-                            False, 'transpose_Y', False, "alpha", 1)
-            pre_act = dygraph_utils._append_bias_in_dygraph(
-                pre_bias, self.bias, axis=len(input.shape) - 1)
-
-            return dygraph_utils._append_activation_in_dygraph(pre_act,
-                                                               self._act)
-
-        check_variable_and_dtype(input, 'input',
-                                 ['float16', 'float32', 'float64'],
-                                 "QuantizedLinear")
-        attrs = {
-            "transpose_X": False,
-            "transpose_Y": False,
-            "alpha": 1,
-        }
-        inputs = {"X": [quant_input], "Y": [quant_weight]}
-        mul_out = self._helper.create_variable_for_type_inference(self._dtype)
-
-        self._helper.append_op(
-            type="matmul",
-            inputs=inputs,
-            outputs={"Out": [mul_out]},
-            attrs=attrs)
-        if self.bias is not None:
-            pre_activation = self._helper.create_variable_for_type_inference(
-                dtype=self._dtype)
-            self._helper.append_op(
-                type='elementwise_add',
-                inputs={'X': [mul_out],
-                        'Y': [self.bias]},
-                outputs={'Out': [pre_activation]},
-                attrs={'axis': len(input.shape) - 1})
-        else:
-            pre_activation = mul_out
-        return self._helper.append_activation(pre_activation, act=self._act)
+        out = F.linear(
+            x=quant_input, weight=quant_weight, bias=self.bias, name=self.name)
+        return out


 class MovingAverageAbsMaxScale(layers.Layer):
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@ -27,11 +27,11 @@ from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
 from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
-from paddle.fluid.dygraph.nn import Conv2D
+from paddle.nn import Linear, Conv2D, Softmax
 from paddle.fluid.dygraph.nn import Pool2D
-from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D

 paddle.enable_static()

@ -43,7 +43,7 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


-def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
+def StaticLenet(data, num_classes=10):
    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
@ -85,15 +85,15 @@ def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
                          bias_attr=fc_b2_attr)
    fc3 = fluid.layers.fc(input=fc2,
                          size=num_classes,
-                          act=classifier_activation,
                          param_attr=fc_w3_attr,
                          bias_attr=fc_b3_attr)
+    fc4 = fluid.layers.softmax(fc3, use_cudnn=True)

-    return fc3
+    return fc4


 class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10, classifier_activation='softmax'):
+    def __init__(self, num_classes=10):
        super(ImperativeLenet, self).__init__()
        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
@ -107,47 +107,46 @@ class ImperativeLenet(fluid.dygraph.Layer):
        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
-                num_channels=1,
-                num_filters=6,
-                filter_size=3,
+                in_channels=1,
+                out_channels=6,
+                kernel_size=3,
                stride=1,
                padding=1,
-                param_attr=conv2d_w1_attr,
+                weight_attr=conv2d_w1_attr,
                bias_attr=conv2d_b1_attr),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2),
            Conv2D(
-                num_channels=6,
-                num_filters=16,
-                filter_size=5,
+                in_channels=6,
+                out_channels=16,
+                kernel_size=5,
                stride=1,
                padding=0,
-                param_attr=conv2d_w2_attr,
+                weight_attr=conv2d_w2_attr,
                bias_attr=conv2d_b2_attr),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2))

        self.fc = Sequential(
            Linear(
-                input_dim=400,
-                output_dim=120,
-                param_attr=fc_w1_attr,
+                in_features=400,
+                out_features=120,
+                weight_attr=fc_w1_attr,
                bias_attr=fc_b1_attr),
            Linear(
-                input_dim=120,
-                output_dim=84,
-                param_attr=fc_w2_attr,
+                in_features=120,
+                out_features=84,
+                weight_attr=fc_w2_attr,
                bias_attr=fc_b2_attr),
            Linear(
-                input_dim=84,
-                output_dim=num_classes,
-                act=classifier_activation,
-                param_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr))
+                in_features=84,
+                out_features=num_classes,
+                weight_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr),
+            Softmax())

    def forward(self, inputs):
        x = self.features(inputs)
-
        x = fluid.layers.flatten(x, 1)
        x = self.fc(x)
        return x
@ -162,8 +161,19 @@ class TestImperativeQat(unittest.TestCase):
        imperative_qat = ImperativeQuantAware(
            weight_quantize_type='abs_max',
            activation_quantize_type='moving_average_abs_max')
-
        with fluid.dygraph.guard():
+            # For CI coverage
+            conv1 = Conv2D(
+                in_channels=3,
+                out_channels=2,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                padding_mode='replicate')
+            quant_conv1 = QuantizedConv2D(conv1)
+            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
+            quant_conv1(fluid.dygraph.to_variable(data))
+
            lenet = ImperativeLenet()
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
@ -286,7 +296,7 @@ class TestImperativeQat(unittest.TestCase):
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
-        lr = 0.1
+        lr = 0.01

        # imperative train
        _logger.info(
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
@ -27,9 +27,8 @@ from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
 from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
-from paddle.fluid.dygraph.nn import Conv2D
+from paddle.nn import Linear, Conv2D, Softmax
 from paddle.fluid.dygraph.nn import Pool2D
-from paddle.fluid.dygraph.nn import Linear
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX

@ -43,7 +42,7 @@ _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


-def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
+def StaticLenet(data, num_classes=10):
    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
@ -85,15 +84,15 @@ def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
                          bias_attr=fc_b2_attr)
    fc3 = fluid.layers.fc(input=fc2,
                          size=num_classes,
-                          act=classifier_activation,
                          param_attr=fc_w3_attr,
                          bias_attr=fc_b3_attr)
+    fc4 = fluid.layers.softmax(fc3, use_cudnn=True)

-    return fc3
+    return fc4


 class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10, classifier_activation='softmax'):
+    def __init__(self, num_classes=10):
        super(ImperativeLenet, self).__init__()
        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
@ -107,53 +106,52 @@ class ImperativeLenet(fluid.dygraph.Layer):
        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
        self.features = Sequential(
            Conv2D(
-                num_channels=1,
-                num_filters=6,
-                filter_size=3,
+                in_channels=1,
+                out_channels=6,
+                kernel_size=3,
                stride=1,
                padding=1,
-                param_attr=conv2d_w1_attr,
+                weight_attr=conv2d_w1_attr,
                bias_attr=conv2d_b1_attr),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2),
            Conv2D(
-                num_channels=6,
-                num_filters=16,
-                filter_size=5,
+                in_channels=6,
+                out_channels=16,
+                kernel_size=5,
                stride=1,
                padding=0,
-                param_attr=conv2d_w2_attr,
+                weight_attr=conv2d_w2_attr,
                bias_attr=conv2d_b2_attr),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2))

        self.fc = Sequential(
            Linear(
-                input_dim=400,
-                output_dim=120,
-                param_attr=fc_w1_attr,
+                in_features=400,
+                out_features=120,
+                weight_attr=fc_w1_attr,
                bias_attr=fc_b1_attr),
            Linear(
-                input_dim=120,
-                output_dim=84,
-                param_attr=fc_w2_attr,
+                in_features=120,
+                out_features=84,
+                weight_attr=fc_w2_attr,
                bias_attr=fc_b2_attr),
            Linear(
-                input_dim=84,
-                output_dim=num_classes,
-                act=classifier_activation,
-                param_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr))
+                in_features=84,
+                out_features=num_classes,
+                weight_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr),
+            Softmax())

    def forward(self, inputs):
        x = self.features(inputs)
-
        x = fluid.layers.flatten(x, 1)
        x = self.fc(x)
        return x


-class TestImperativeQat(unittest.TestCase):
+class TestImperativeQatChannelWise(unittest.TestCase):
    """
    QAT = quantization-aware training
    """
@ -286,7 +284,7 @@ class TestImperativeQat(unittest.TestCase):
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
-        lr = 0.1
+        lr = 0.001

        # imperative train
        _logger.info(