Add post_training_quantization (#20800)

* add post training quantization, test=develop * specify the quantizable op type, test=develop
6 years ago · 175ba39c03
parent 0059404e77
commit 175ba39c03
5 changed files with 857 additions and 21 deletions
--- a/python/paddle/fluid/contrib/slim/quantization/init.py
+++ b/python/paddle/fluid/contrib/slim/quantization/init.py
@ -22,7 +22,10 @@ from . import mkldnn_post_training_strategy
 from .mkldnn_post_training_strategy import *
 from . import quantization_mkldnn_pass
 from .quantization_mkldnn_pass import *
+from . import post_training_quantization
+from .post_training_quantization import *

 __all__ = quantization_pass.__all__ + quantization_strategy.__all__
 __all__ += mkldnn_post_training_strategy.__all__
 __all__ += quantization_mkldnn_pass.__all__
+__all__ += post_training_quantization.__all__
--- a/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@ -26,8 +26,6 @@ __all__ = [
    'AddQuantDequantPass'
 ]

-_quantizable_op_list = ['conv2d', 'depthwise_conv2d', 'mul']
-
 _fake_quant_op_list = [
    'fake_quantize_abs_max', 'fake_quantize_range_abs_max',
    'fake_quantize_moving_average_abs_max', 'fake_channel_wise_quantize_abs_max'
@ -65,17 +63,18 @@ class QuantizationTransformPass(object):
                 weight_quantize_type='abs_max',
                 window_size=10000,
                 moving_rate=0.9,
-                 skip_pattern='skip_quant'):
+                 skip_pattern='skip_quant',
+                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
        """
        Convert and rewrite the IrGraph according to weight and
        activation quantization type.

        Args:
            scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
-            type, this pass will create some new parameters. The scope is used to
-            initialize these new parameters.
+                type, this pass will create some new parameters. The scope is used to
+                initialize these new parameters.
            place(fluid.CPUPlace|fluid.CUDAPlace): place is used to initialize new
-            parameters described above.
+                parameters described above.
            weight_bits (int): quantization bit number for weights,
                the bias is not quantized.
            activation_bits (int): quantization bit number for activation.
@ -93,6 +92,8 @@ class QuantizationTransformPass(object):
            skip_pattern(str): The user-defined quantization skip pattern, which
                will be presented in the name scope of an op. When the skip pattern is
                detected in an op's name scope, the corresponding op will not be quantized.
+            quantizable_op_type(list[str]): List the type of ops that will be quantized. 
+                Default is ["conv2d", "depthwise_conv2d", "mul"].

        Examples:
        .. code-block:: python
@ -119,7 +120,8 @@ class QuantizationTransformPass(object):
            'abs_max', 'channel_wise_abs_max', 'range_abs_max',
            'moving_average_abs_max'
        ]
-        assert activation_quantize_type != 'channel_wise_abs_max', "The activation quantization type does not support 'channel_wise_abs_max'."
+        assert activation_quantize_type != 'channel_wise_abs_max', \
+            "The activation quantization type does not support 'channel_wise_abs_max'."
        if activation_quantize_type not in quant_type:
            raise ValueError(
                "Unknown activation_quantize_type : '%s'. It can only be "
@ -136,7 +138,11 @@ class QuantizationTransformPass(object):
        self._window_size = window_size
        self._moving_rate = moving_rate

-        self._quantizable_ops = _quantizable_op_list
+        self._quantizable_ops = quantizable_op_type
+        supported_quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
+        for op in self._quantizable_ops:
+            assert op in supported_quantizable_ops, \
+                op + " is not supported for quantization."
        self._conv_ops = ['conv2d', 'depthwise_conv2d']
        self._quantizable_grad_ops = [
            '%s_grad' % (op) for op in self._quantizable_ops
@ -595,9 +601,11 @@ class QuantizationFreezePass(object):
        place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors.
        weight_bits (int): quantization bit number for weights.
        activation_bits (int): quantization bit number for activation.
-        weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
-        The 'range_abs_max' usually is not used for weight, since weights are fixed once the
-        model is well trained.
+        weight_quantize_type (str): quantization type for weights, support 'abs_max' and 
+            'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight, 
+            since weights are fixed once the model is well trained.
+        quantizable_op_type(list[str]): List the type of ops that will be quantized. 
+            Default is ["conv2d", "depthwise_conv2d", "mul"].
    """

    def __init__(self,
@ -605,7 +613,8 @@ class QuantizationFreezePass(object):
                 place,
                 weight_bits=8,
                 activation_bits=8,
-                 weight_quantize_type='abs_max'):
+                 weight_quantize_type='abs_max',
+                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
        assert scope is not None, \
            'The scope cannot be set None.'
        assert place is not None, \
@ -615,7 +624,11 @@ class QuantizationFreezePass(object):
        self._weight_bits = weight_bits
        self._activation_bits = activation_bits
        self._weight_quantize_type = weight_quantize_type
-        self._quantizable_ops = _quantizable_op_list
+        self._quantizable_ops = quantizable_op_type
+        supported_quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
+        for op in self._quantizable_ops:
+            assert op in supported_quantizable_ops, \
+                op + " is not supported for quantization."
        self._conv_ops = ['conv2d', 'depthwise_conv2d']
        self._fake_quant_op_names = _fake_quant_op_list
        self._fake_dequant_op_names = _fake_dequant_op_list
@ -888,17 +901,26 @@ class ConvertToInt8Pass(object):
    Args:
        scope(fluid.Scope): scope is used to get the weight tensor values.
        place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the
-        8bits weight tensors.
+            8bits weight tensors.
+        quantizable_op_type(list[str]): List the type of ops that will be quantized. 
+            Default is ["conv2d", "depthwise_conv2d", "mul"].
    """

-    def __init__(self, scope, place):
+    def __init__(self,
+                 scope,
+                 place,
+                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
        assert scope is not None, \
            'The scope cannot be set None.'
        assert place is not None, \
            'The place cannot be set None.'
        self._scope = scope
        self._place = place
-        self._quantizable_ops = _quantizable_op_list
+        self._quantizable_ops = quantizable_op_type
+        supported_quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
+        for op in self._quantizable_ops:
+            assert op in supported_quantizable_ops, \
+                op + " is not supported for quantization."

    def apply(self, graph):
        """
@ -1166,7 +1188,8 @@ class AddQuantDequantPass(object):
                 place=None,
                 moving_rate=0.9,
                 quant_bits=8,
-                 skip_pattern='skip_quant'):
+                 skip_pattern='skip_quant',
+                 quantizable_op_type=["elementwise_add", "pool2d"]):
        """
        This pass is used to add quant_dequant op for some ops, such as the
        'elementwise_add' and 'pool2d' op.
@ -1176,9 +1199,16 @@ class AddQuantDequantPass(object):
        self._moving_rate = moving_rate
        self._quant_bits = quant_bits
        self._is_test = None
-        self._target_ops = ["elementwise_add", "pool2d"]
-        self._target_grad_ops = ['%s_grad' % (op) for op in self._target_ops]
        self._skip_pattern = skip_pattern
+        self._quantizable_op_type = quantizable_op_type
+        self._quantizable_grad_op_type = [
+            '%s_grad' % (op) for op in self._quantizable_op_type
+        ]
+
+        supported_quantizable_op_type = ["elementwise_add", "pool2d"]
+        for op_type in quantizable_op_type:
+            assert op_type in supported_quantizable_op_type, \
+                op_type + " is not supported for quantization."

    def apply(self, graph):
        """
@ -1194,7 +1224,7 @@ class AddQuantDequantPass(object):
        ops = graph.all_op_nodes()

        for op_node in ops:
-            if op_node.name() in self._target_ops:
+            if op_node.name() in self._quantizable_op_type:
                if isinstance(self._skip_pattern, str) and \
                           op_node.op().has_attr("op_namescope") and \
                           op_node.op().attr("op_namescope").find(self._skip_pattern) != -1:
@ -1221,7 +1251,7 @@ class AddQuantDequantPass(object):
                    graph.update_input_link(in_node, quant_var_node, op_node)

        for op_node in ops:
-            if op_node.name() in self._target_grad_ops:
+            if op_node.name() in self._quantizable_grad_op_type:
                for input_name in op_node.input_arg_names():
                    if input_name in dequantized_vars_map:
                        in_node = graph._find_node_by_name(op_node.inputs,
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@ -48,6 +48,7 @@ endfunction()

 if(WIN32)
    list(REMOVE_ITEM TEST_OPS test_light_nas)
+    list(REMOVE_ITEM TEST_OPS test_post_training_quantization)
 endif()

 # int8 image classification python api test
--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization.py