Support user-defined activation/weight quantize and preprocess. (#28570)

* support user-defined quant and preprocess
5 years ago · 5050e761b8
parent 11e32baf1e
commit 5050e761b8
3 changed files with 373 additions and 39 deletions
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@ -59,7 +59,11 @@ class ImperativeQuantAware(object):
                 weight_quantize_type='abs_max',
                 activation_quantize_type='moving_average_abs_max',
                 moving_rate=0.9,
-                 quantizable_layer_type=['Conv2D', 'Linear']):
+                 quantizable_layer_type=['Conv2D', 'Linear'],
                 weight_preprocess_layer=None,
                 act_preprocess_layer=None,
                 weight_quantize_layer=None,
                 act_quantize_layer=None):
        """
        The constructor for ImperativeQuantAware.
@ -81,7 +85,28 @@ class ImperativeQuantAware(object):
            quantizable_op_type(list[str]): List the type of layers that will be quantized. 
                Default is ['Conv2D', 'Linear']. The quantizable_op_type in
                QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
-
+            weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
                weight before quantization. Using this can quickly test if user's
                preprocess method works or not. The input is non-quantized
                weight and function returns processed weight to be quantized.
                If None, the weight will be quantized directly. Default is None.
            act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
                activation before quantization. Using this can quickly test if user's
                preprocess method works or not. The input is non-quantized
                activation and function returns processed activation to be quantized.
                If None, the activation will be quantized directly. Default is None.
            weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize weight.
                Using this can quickly test if user's quantization method works or not.
                In this layer, user should both define quantization method and
                dequantization method, that is, the function's input is non-quantized
                weight and returns dequantized weight. If None, will use
                quantization op defined by 'weight_quantize_type'. Default is None.
            act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize activation.
                Using this can quickly test if user's quantization method works or not.
                In this layer, user should both define quantization method and
                dequantization method, that is, the function's input is non-quantized
                activation and returns dequantized activation. If None, will use
                quantization op defined by 'activation_quantize_type'. Default is None.
        Examples:
        .. code-block:: python
@ -118,6 +143,19 @@ class ImperativeQuantAware(object):
        self._activation_bits = activation_bits
        self._moving_rate = moving_rate
        self._weight_pre_layer = weight_preprocess_layer
        self._act_pre_layer = act_preprocess_layer
        self._weight_quant_layer = weight_quantize_layer
        self._act_quant_layer = act_quantize_layer
        t_check = lambda method: method is None or issubclass(method, dygraph.layers.Layer)
        assert t_check(
            self._weight_pre_layer), "weight_preprocess should be nn.Layer"
        assert t_check(self._act_pre_layer), "act_preprocess should be nn.Layer"
        assert t_check(
            self._weight_quant_layer), "weight_quantize should be nn.Layer"
        assert t_check(self._act_quant_layer), "act_quantize should be nn.Layer"
        quant_type = {
            'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
        }
@ -189,7 +227,9 @@ class ImperativeQuantAware(object):
        quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
            layer, self._weight_bits, self._activation_bits, self._moving_rate,
-            self._weight_quantize_type, self._activation_quantize_type)
+            self._weight_quantize_type, self._activation_quantize_type,
            self._weight_pre_layer, self._act_pre_layer,
            self._weight_quant_layer, self._act_quant_layer)
        return quantized_layer
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@ -332,7 +332,11 @@ class QuantizedConv2D(layers.Layer):
                 activation_bits=8,
                 moving_rate=0.9,
                 weight_quantize_type='abs_max',
-                 activation_quantize_type='abs_max'):
+                 activation_quantize_type='abs_max',
                 weight_pre_layer=None,
                 act_pre_layer=None,
                 weight_quant_layer=None,
                 act_quant_layer=None):
        super(QuantizedConv2D, self).__init__()
        # For Conv2D
        self._groups = getattr(layer, '_groups')
@ -347,6 +351,10 @@ class QuantizedConv2D(layers.Layer):
        self.bias = getattr(layer, 'bias')
        # For FakeQuant
        self._conv2d_quant_axis = 0
        if weight_quant_layer is not None:
            self._fake_quant_weight = weight_quant_layer()
        else:
            self._fake_quant_weight = _get_fake_quant_type(
                weight_quantize_type,
                name=self.weight.name,
@ -356,6 +364,9 @@ class QuantizedConv2D(layers.Layer):
                quant_on_weight=True,
                channel_num=self.weight.shape[self._conv2d_quant_axis],
                quant_axis=self._conv2d_quant_axis)
        if act_quant_layer is not None:
            self._fake_quant_input = act_quant_layer()
        else:
            self._fake_quant_input = _get_fake_quant_type(
                activation_quantize_type,
                name=layer.full_name(),
@ -364,9 +375,20 @@ class QuantizedConv2D(layers.Layer):
                dtype=self._dtype,
                quant_on_weight=False)
        self._act_preprocess = act_pre_layer(
        ) if act_pre_layer is not None else None
        self._weight_preprocess = weight_pre_layer(
        ) if weight_pre_layer is not None else None
    def forward(self, input):
        if self._act_preprocess is not None:
            input = self._act_preprocess(input)
        quant_input = self._fake_quant_input(input)
-        quant_weight = self._fake_quant_weight(self.weight)
+
        weight = self.weight
        if self._weight_preprocess is not None:
            weight = self._weight_preprocess(self.weight)
        quant_weight = self._fake_quant_weight(weight)
        if in_dygraph_mode() and self._l_type == 'conv2d':
            attrs = ('strides', self._stride, 'paddings', self._padding,
@ -428,7 +450,11 @@ class QuantizedLinear(layers.Layer):
                 activation_bits=8,
                 moving_rate=0.9,
                 weight_quantize_type='abs_max',
-                 activation_quantize_type='abs_max'):
+                 activation_quantize_type='abs_max',
                 weight_pre_layer=None,
                 act_pre_layer=None,
                 weight_quant_layer=None,
                 act_quant_layer=None):
        super(QuantizedLinear, self).__init__()
        # For Linear
        self._act = getattr(layer, '_act')
@ -437,6 +463,10 @@ class QuantizedLinear(layers.Layer):
        self.bias = getattr(layer, 'bias')
        # For FakeQuant
        self._linear_quant_axis = 1
        if weight_quant_layer is not None:
            self._fake_quant_weight = weight_quant_layer()
        else:
            self._fake_quant_weight = _get_fake_quant_type(
                weight_quantize_type,
                name=self.weight.name,
@ -446,6 +476,10 @@ class QuantizedLinear(layers.Layer):
                quant_on_weight=True,
                channel_num=self.weight.shape[self._linear_quant_axis],
                quant_axis=self._linear_quant_axis)
        if act_quant_layer is not None:
            self._fake_quant_input = act_quant_layer()
        else:
            self._fake_quant_input = _get_fake_quant_type(
                activation_quantize_type,
                name=layer.full_name(),
@ -454,9 +488,21 @@ class QuantizedLinear(layers.Layer):
                dtype=self._dtype,
                quant_on_weight=False)
        self._act_preprocess = act_pre_layer(
        ) if act_pre_layer is not None else None
        self._weight_preprocess = weight_pre_layer(
        ) if weight_pre_layer is not None else None
    def forward(self, input):
        if self._act_preprocess is not None:
            input = self._act_preprocess(input)
        quant_input = self._fake_quant_input(input)
-        quant_weight = self._fake_quant_weight(self.weight)
+
        weight = self.weight
        if self._weight_preprocess is not None:
            weight = self._weight_preprocess(self.weight)
        quant_weight = self._fake_quant_weight(weight)
        if in_dygraph_mode():
            pre_bias = _varbase_creator(dtype=input.dtype)
            core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X',
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
@ -0,0 +1,248 @@
 #   copyright (c) 2020 paddlepaddle authors. all rights reserved.
 #
 # licensed under the apache license, version 2.0 (the "license");
 # you may not use this file except in compliance with the license.
 # you may obtain a copy of the license at
 #
 #     http://www.apache.org/licenses/license-2.0
 #
 # unless required by applicable law or agreed to in writing, software
 # distributed under the license is distributed on an "as is" basis,
 # without warranties or conditions of any kind, either express or implied.
 # see the license for the specific language governing permissions and
 # limitations under the license.
 from __future__ import print_function
 import os
 import numpy as np
 import random
 import unittest
 import logging
 import paddle
 import paddle.nn as nn
 from paddle.optimizer import Adam
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
 from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.nn import Sequential
 from paddle.fluid.dygraph import Conv2D
 from paddle.nn import Pool2D
 from paddle.fluid.dygraph import Linear
 from paddle.fluid.log_helper import get_logger
 os.environ["CPU_NUM"] = "1"
 _logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 class PACT(nn.Layer):
    def __init__(self, init_value=20):
        super(PACT, self).__init__()
        alpha_attr = paddle.ParamAttr(
            name=self.full_name() + ".pact",
            initializer=paddle.nn.initializer.Constant(value=init_value))
        self.alpha = self.create_parameter(
            shape=[1], attr=alpha_attr, dtype='float32')
    def forward(self, x):
        out_left = paddle.nn.functional.relu(x - self.alpha)
        out_right = paddle.nn.functional.relu(-self.alpha - x)
        x = x - out_left + out_right
        return x
 class CustomQAT(nn.Layer):
    def __init__(self):
        super(CustomQAT, self).__init__()
        attr = paddle.ParamAttr(
            initializer=paddle.nn.initializer.Constant(value=1.0))
        self.u_param = self.create_parameter(
            shape=[1], attr=attr, dtype='float32')
        self.l_param = self.create_parameter(
            shape=[1], attr=attr, dtype='float32')
        self.alpha_param = self.create_parameter(
            shape=[1], attr=attr, dtype='float32')
        self.upper = self.create_parameter(
            shape=[1], attr=attr, dtype='float32')
        self.upper.stop_gradient = True
        self.lower = self.create_parameter(
            shape=[1], attr=attr, dtype='float32')
        self.lower.stop_gradient = True
    def forward(self, x):
        def clip(x, upper, lower):
            x = x + paddle.nn.functional.relu(lower - x)
            x = x - paddle.nn.functional.relu(x - upper)
            return x
        def phi_function(x, mi, alpha, delta):
            s = 1 / (1 - alpha)
            k = paddle.log(2 / alpha - 1) * (1 / delta)
            x = (paddle.tanh((x - mi) * k)) * s
            return x
        def dequantize(x, lower_bound, delta, interval):
            x = ((x + 1) / 2 + interval) * delta + lower_bound
            return x
        bit = 8
        bit_range = 2**bit - 1
        paddle.assign(self.upper * 0.9 + self.u_param * 0.1, self.upper)
        paddle.assign(self.lower * 0.9 + self.l_param * 0.1, self.lower)
        x = clip(x, self.upper, self.lower)
        delta = (self.upper - self.lower) / bit_range
        interval = (x - self.lower) / delta
        mi = (interval + 0.5) * delta + self.l_param
        x = phi_function(x, mi, self.alpha_param, delta)
        x = dequantize(x, self.l_param, delta, interval)
        return x
 class ImperativeLenet(paddle.nn.Layer):
    def __init__(self, num_classes=10, classifier_activation='softmax'):
        super(ImperativeLenet, self).__init__()
        self.features = Sequential(
            Conv2D(
                num_channels=1,
                num_filters=6,
                filter_size=3,
                stride=1,
                padding=1),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2),
            Conv2D(
                num_channels=6,
                num_filters=16,
                filter_size=5,
                stride=1,
                padding=0),
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2))
        self.fc = Sequential(
            Linear(
                input_dim=400, output_dim=120),
            Linear(
                input_dim=120, output_dim=84),
            Linear(
                input_dim=84, output_dim=num_classes,
                act=classifier_activation))
    def forward(self, inputs):
        x = self.features(inputs)
        x = paddle.flatten(x, 1)
        x = self.fc(x)
        return x
 class TestUserDefinedActPreprocess(unittest.TestCase):
    def setUp(self):
        _logger.info("test act_preprocess")
        self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
    def test_quant_aware_training(self):
        imperative_qat = self.imperative_qat
        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed
        lenet = ImperativeLenet()
        fixed_state = {}
        param_init_map = {}
        for name, param in lenet.named_parameters():
            p_shape = param.numpy().shape
            p_value = param.numpy()
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
                value = np.random.normal(
                    loc=0.0, scale=0.01,
                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
            fixed_state[name] = value
            param_init_map[param.name] = value
        lenet.set_dict(fixed_state)
        imperative_qat.quantize(lenet)
        adam = Adam(learning_rate=0.001, parameters=lenet.parameters())
        dynamic_loss_rec = []
        def train(model):
            adam = Adam(learning_rate=0.001, parameters=model.parameters())
            epoch_num = 1
            for epoch in range(epoch_num):
                model.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
                    img = paddle.to_tensor(x_data)
                    label = paddle.to_tensor(y_data)
                    out = model(img)
                    acc = paddle.metric.accuracy(out, label, k=1)
                    loss = nn.functional.loss.cross_entropy(out, label)
                    avg_loss = paddle.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    model.clear_gradients()
                    if batch_id % 50 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))
                        break
        def test(model):
            model.eval()
            avg_acc = [[], []]
            for batch_id, data in enumerate(test_reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
                img = paddle.to_tensor(x_data)
                label = paddle.to_tensor(y_data)
                out = model(img)
                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
                avg_acc[0].append(acc_top1.numpy())
                avg_acc[1].append(acc_top5.numpy())
                if batch_id % 100 == 0:
                    _logger.info(
                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
                            batch_id, acc_top1.numpy(), acc_top5.numpy()))
        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=512, drop_last=True)
        test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=512)
        train(lenet)
        test(lenet)
 class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
    def setUp(self):
        _logger.info("test weight_preprocess")
        self.imperative_qat = ImperativeQuantAware(weight_preprocess_layer=PACT)
 class TestUserDefinedActQuantize(TestUserDefinedActPreprocess):
    def setUp(self):
        _logger.info("test act_quantize")
        self.imperative_qat = ImperativeQuantAware(act_quantize_layer=CustomQAT)
 class TestUserDefinedWeightQuantize(TestUserDefinedActPreprocess):
    def setUp(self):
        _logger.info("test weight_quantize")
        self.imperative_qat = ImperativeQuantAware(
            weight_quantize_layer=CustomQAT)
 if __name__ == '__main__':
    unittest.main()