You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
319 lines
14 KiB
319 lines
14 KiB
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import collections
|
|
from .... import core
|
|
from ....framework import IrGraph
|
|
from ....framework import Program
|
|
from ....framework import Variable
|
|
from ....initializer import Constant
|
|
from .... import unique_name
|
|
|
|
__all__ = ['QuantizationTransformPass']
|
|
|
|
|
|
class QuantizationTransformPass(object):
|
|
def __init__(self,
|
|
scope=None,
|
|
program_exe=None,
|
|
weight_bits=8,
|
|
activation_bits=8,
|
|
activation_quantize_type='abs_max',
|
|
weight_quantize_type='abs_max',
|
|
window_size=10000):
|
|
"""
|
|
Convert and rewrite the IrGraph according to weight and
|
|
activation quantization type.
|
|
Args:
|
|
weight_bits (int): quantization bit number for weights,
|
|
the bias is not quantized.
|
|
activation_bits (int): quantization bit number for activation.
|
|
activation_quantize_type (str): quantization type for activation,
|
|
now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode,
|
|
the quantization scale will be calculated dynamically each step
|
|
in both training and testing period. If use 'range_abs_max',
|
|
a static quantization scale will be calculated during training
|
|
and used in inference.
|
|
weight_quantize_type (str): quantization type for weights,
|
|
support 'abs_max'. The 'range_abs_max' usually is not used for
|
|
weight, since weights are fixed once the model is well trained.
|
|
window_size (int): the window size for 'range_abs_max' quantization.
|
|
Examples:
|
|
.. code-block:: python
|
|
# The original graph will be rewrite.
|
|
import paddle.fluid as fluid
|
|
from paddle.fluid.contrib.slim.quantization \
|
|
import QuantizationTransformPass
|
|
from paddle.fluid.contrib.slim.graph import IrGraph
|
|
from paddle.fluid import core
|
|
|
|
graph = IrGraph(core.Graph(program.desc), for_test=False)
|
|
exe = fluid.Executor(fluid.CPUPlace())
|
|
transform_pass = QuantizationTransformPass(fluid.global_scope(),
|
|
exe)
|
|
transform_pass.apply(graph)
|
|
"""
|
|
self._scope = scope
|
|
self._program_exe = program_exe
|
|
self._weight_bits = weight_bits
|
|
self._activation_bits = activation_bits
|
|
|
|
quant_type = ['abs_max', 'range_abs_max']
|
|
if activation_quantize_type not in quant_type:
|
|
raise ValueError(
|
|
"Unknown activation_quantize_type : '%s'. It can only be ",
|
|
"'abs_max' or 'range_abs_max'.", str(activation_quantize_type))
|
|
if weight_quantize_type not in quant_type:
|
|
raise ValueError(
|
|
"Unknown weight_quantize_type: '%s'. It can only be ",
|
|
"'abs_max' or 'range_abs_max'.", str(weight_quantize_type))
|
|
|
|
self._activation_quantize_type = activation_quantize_type
|
|
self._weight_quantize_type = weight_quantize_type
|
|
self._window_size = window_size
|
|
|
|
self._need_initialized = collections.OrderedDict()
|
|
self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
|
|
self._quantizable_grad_ops = [
|
|
'%s_grad' % (op) for op in self._quantizable_ops
|
|
]
|
|
self._fake_quant_op_types = [
|
|
'fake_quantize_abs_max', 'fake_quantize_range_abs_max'
|
|
]
|
|
self._fake_dequant_op_types = ['fake_dequantize_max_abs']
|
|
self._is_test = None
|
|
self._global_step = None
|
|
|
|
def apply(self, graph):
|
|
assert isinstance(graph,
|
|
IrGraph), 'graph must be the instance of IrGraph.'
|
|
self._need_initialized.clear()
|
|
self._is_test = graph.is_test()
|
|
# marked the variable which has been dequantized.
|
|
dequantized_vars = collections.OrderedDict()
|
|
params = [p.name() for p in graph.all_parameters()]
|
|
|
|
def _transform_forward(graph, op):
|
|
for var_node in op.inputs:
|
|
if var_node.name() in dequantized_vars:
|
|
dequant_var_node = dequantized_vars[var_node.name()]
|
|
else:
|
|
quant_bits = self._weight_bits if var_node.name() in params \
|
|
else self._activation_bits
|
|
quant_type = self._weight_quantize_type if var_node.name() \
|
|
in params else self._activation_quantize_type
|
|
quant_var_node, scale_var_node = self._insert_quant_op(
|
|
graph, var_node, quant_bits, quant_type)
|
|
dequant_var_node = self._insert_dequant_op(
|
|
graph, quant_var_node, scale_var_node, quant_bits)
|
|
dequantized_vars[var_node.name()] = dequant_var_node
|
|
graph.update_input_link(var_node, dequant_var_node, op)
|
|
|
|
def _transform_backward(graph, op):
|
|
no_dequanted_input_vars = True
|
|
for var_node in op.inputs:
|
|
if var_node.name() in dequantized_vars:
|
|
dequant_var_node = dequantized_vars[var_node.name()]
|
|
graph.update_input_link(var_node, dequant_var_node, op)
|
|
no_dequanted_input_vars = False
|
|
if no_dequanted_input_vars:
|
|
raise ValueError("There is no dequanted inputs for op %s." %
|
|
(op.name()))
|
|
|
|
if not self._is_test:
|
|
self._create_global_step(graph)
|
|
ops = graph.all_ops()
|
|
# The process of _transform_forward and _transform_backward is needed in two for loops.
|
|
# The loop for transforming the forward graph:
|
|
for op in ops:
|
|
if op.name() in self._quantizable_ops:
|
|
_transform_forward(graph, op)
|
|
# The loop for renaming the inputs of backward op.
|
|
for op in ops:
|
|
if op.name() in self._quantizable_grad_ops:
|
|
_transform_backward(graph, op)
|
|
|
|
if len(self._need_initialized) > 0:
|
|
assert self._scope is not None, \
|
|
'The scope cannot be set None when activation_quantize_type equals to range_abs_max.'
|
|
assert self._program_exe is not None, \
|
|
'The program_exe cannot be set None when activation_quantize_type equals to range_abs_max.'
|
|
init_program = Program()
|
|
for var_desc, initializer in self._need_initialized.iteritems():
|
|
var = Variable(init_program.global_block())
|
|
var._set_desc(var_desc)
|
|
initializer(var, init_program.global_block())
|
|
self._program_exe.run(program=init_program, scope=self._scope)
|
|
|
|
return graph
|
|
|
|
def _create_global_step(self, graph):
|
|
if self._weight_quantize_type == 'range_abs_max' or \
|
|
self._activation_quantize_type == 'range_abs_max':
|
|
counter_name = '@STEP_COUNTER@'
|
|
for node in graph.all_vars():
|
|
if node.name() == counter_name:
|
|
self._global_step = node
|
|
if self._global_step is None:
|
|
global_step_in = graph.create_param_node(
|
|
name=counter_name,
|
|
var_type=core.VarDesc.VarType.LOD_TENSOR,
|
|
shape=[1],
|
|
var_dtype=core.VarDesc.VarType.INT64)
|
|
self._need_initialized[global_step_in.var()] = \
|
|
Constant(value=0, force_cpu=True)
|
|
global_step_out = graph.create_var_node_from_desc(
|
|
global_step_in.var())
|
|
increment_op = graph.create_op_node(
|
|
op_type='increment',
|
|
attrs={'step': 1.0},
|
|
inputs={'X': global_step_in},
|
|
outputs={'Out': global_step_out})
|
|
graph.link_to(global_step_in, increment_op)
|
|
graph.link_to(increment_op, global_step_out)
|
|
self._global_step = global_step_out
|
|
|
|
def _insert_quant_op(self, graph, var_node, quant_bits, quant_type):
|
|
"""
|
|
Insert fake_quantize_op in the graph.
|
|
"""
|
|
if quant_type == 'abs_max':
|
|
return self._insert_quant_abs_max_op(graph, var_node, quant_bits)
|
|
elif quant_type == 'range_abs_max':
|
|
return self._insert_quant_range_abs_max_op(graph, var_node,
|
|
quant_bits)
|
|
|
|
def _insert_quant_abs_max_op(self, graph, var_node, quant_bits):
|
|
"""
|
|
Insert fake_quantize_abs_max op in the graph.
|
|
"""
|
|
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
|
|
|
|
quant_var_node = graph.create_var_node(
|
|
name=self._quantized_var_name(var_node.name()),
|
|
var_type=var_node.var().type(),
|
|
shape=var_node.var().shape(),
|
|
var_dtype=var_node.var().dtype())
|
|
scale_var_node = graph.create_var_node(
|
|
name=self._quantized_scale_name(var_node.name()),
|
|
var_type=var_node.var().type(),
|
|
shape=var_node.var().shape(),
|
|
var_dtype=var_node.var().dtype())
|
|
quant_op_node = graph.create_op_node(
|
|
op_type='fake_quantize_abs_max',
|
|
attrs={'bit_length': quant_bits},
|
|
inputs={'X': var_node},
|
|
outputs={'Out': quant_var_node,
|
|
'OutScale': scale_var_node})
|
|
graph.link_to(var_node, quant_op_node)
|
|
graph.link_to(quant_op_node, quant_var_node)
|
|
graph.link_to(quant_op_node, scale_var_node)
|
|
return quant_var_node, scale_var_node
|
|
|
|
def _insert_quant_range_abs_max_op(self, graph, var_node, quant_bits):
|
|
"""
|
|
Insert fake_quantize_range_abs_max on the graph.
|
|
"""
|
|
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
|
|
|
|
quant_var_node = graph.create_var_node(
|
|
name=self._quantized_var_name(var_node.name()),
|
|
var_type=var_node.var().type(),
|
|
shape=var_node.var().shape(),
|
|
var_dtype=var_node.var().dtype())
|
|
|
|
scale_in_node = graph.create_param_node(
|
|
name=self._quantized_scale_name(var_node.name()),
|
|
var_type=core.VarDesc.VarType.LOD_TENSOR,
|
|
shape=[1],
|
|
var_dtype=var_node.var().dtype())
|
|
self._need_initialized[scale_in_node.var()] = Constant(value=0.001)
|
|
|
|
scale_out_node = graph.create_var_node_from_desc(scale_in_node.var())
|
|
inputs = {'X': var_node, 'InScale': scale_in_node}
|
|
outputs = {'Out': quant_var_node, 'OutScale': scale_out_node}
|
|
|
|
if not self._is_test:
|
|
# The name of scales_var_node maybe 'scales_0', 'scales_1', etc.
|
|
scales_node = graph.create_param_node(
|
|
name=unique_name.generate('scales'),
|
|
var_type=core.VarDesc.VarType.LOD_TENSOR,
|
|
shape=[self._window_size],
|
|
var_dtype=var_node.var().dtype())
|
|
self._need_initialized[scales_node.var()] = Constant(value=0)
|
|
inputs['Iter'] = self._global_step
|
|
outputs['OutScales'] = scales_node
|
|
attrs = {
|
|
'window_size': self._window_size,
|
|
'bit_length': quant_bits,
|
|
'is_test': self._is_test
|
|
}
|
|
quant_op_node = graph.create_op_node(
|
|
op_type='fake_quantize_range_abs_max',
|
|
attrs=attrs,
|
|
inputs=inputs,
|
|
outputs=outputs)
|
|
|
|
graph.link_to(var_node, quant_op_node)
|
|
graph.link_to(scale_in_node, quant_op_node)
|
|
graph.link_to(quant_op_node, quant_var_node)
|
|
graph.link_to(quant_op_node, scale_out_node)
|
|
|
|
if not self._is_test:
|
|
graph.link_to(self._global_step, quant_op_node)
|
|
graph.link_to(quant_op_node, scales_node)
|
|
|
|
return quant_var_node, scale_out_node
|
|
|
|
def _insert_dequant_op(self, graph, var_node, scale_var_node, quant_bits):
|
|
"""
|
|
Insert fake_dequantize_op in the graph.
|
|
"""
|
|
assert var_node.is_var(), '{} is not a var'.format(var_node.name())
|
|
|
|
dequant_var_node = graph.create_var_node(
|
|
name=self._dequantized_var_name(var_node.name()),
|
|
var_type=var_node.var().type(),
|
|
shape=var_node.var().shape(),
|
|
var_dtype=var_node.var().dtype())
|
|
max_range = (1 << (quant_bits - 1)) - 1
|
|
dequant_op_node = graph.create_op_node(
|
|
op_type='fake_dequantize_max_abs',
|
|
attrs={'max_range': float(max_range)},
|
|
inputs={'X': var_node,
|
|
'Scale': scale_var_node},
|
|
outputs={'Out': dequant_var_node})
|
|
graph.link_to(var_node, dequant_op_node)
|
|
graph.link_to(scale_var_node, dequant_op_node)
|
|
graph.link_to(dequant_op_node, dequant_var_node)
|
|
return dequant_var_node
|
|
|
|
def _quantized_var_name(self, var_name):
|
|
"""
|
|
Return quantized variable name for the input `var_name`.
|
|
"""
|
|
return "%s.quantized" % (var_name)
|
|
|
|
def _dequantized_var_name(self, var_name):
|
|
"""
|
|
Return dequantized variable name for the input `var_name`.
|
|
"""
|
|
return "%s.dequantized" % (var_name)
|
|
|
|
def _quantized_scale_name(self, var_name):
|
|
"""
|
|
Return quantized variable name for the input `var_name`.
|
|
"""
|
|
return "%s.scale" % (var_name)
|