@ -21,6 +21,7 @@ from paddle.fluid.framework import _varbase_creator
from paddle . fluid . framework import in_dygraph_mode
from paddle . fluid . initializer import Constant
from paddle . fluid . data_feeder import check_variable_and_dtype
from paddle . nn import functional as F
__all__ = [
' FakeQuantMovingAverage ' , ' FakeQuantAbsMax ' , ' QuantizedConv2D ' ,
@ -144,7 +145,6 @@ class FakeQuantAbsMax(layers.Layer):
quant_on_weight = False ) :
super ( FakeQuantAbsMax , self ) . __init__ ( )
self . _quant_bits = quant_bits
self . _dtype = dtype
self . _name = name
scale_prefix = " {} .scale " . format (
name ) if name else ' quant_dequant.scale '
@ -342,16 +342,17 @@ class QuantizedConv2D(layers.Layer):
self . _groups = getattr ( layer , ' _groups ' )
self . _stride = getattr ( layer , ' _stride ' )
self . _padding = getattr ( layer , ' _padding ' )
self . _padding_mode = getattr ( layer , ' _padding_mode ' )
if self . _padding_mode != ' zeros ' :
self . _reversed_padding_repeated_twice = getattr (
layer , ' _reversed_padding_repeated_twice ' )
self . _dilation = getattr ( layer , ' _dilation ' )
self . _act = getattr ( layer , ' _act ' )
self . _use_cudnn = getattr ( layer , ' _use_cudnn ' )
self . _dtype = getattr ( layer , ' _dtype ' )
self . _l_type = getattr ( layer , ' _l_type ' )
self . _data_format = getattr ( layer , ' _data_format ' )
self . weight = getattr ( layer , ' weight ' )
self . bias = getattr ( layer , ' bias ' )
# For FakeQuant
self . _conv2d_quant_axis = 0
if weight_quant_layer is not None :
self . _fake_quant_weight = weight_quant_layer ( )
else :
@ -390,52 +391,22 @@ class QuantizedConv2D(layers.Layer):
weight = self . _weight_preprocess ( self . weight )
quant_weight = self . _fake_quant_weight ( weight )
if in_dygraph_mode ( ) and self . _l_type == ' conv2d ' :
attrs = ( ' strides ' , self . _stride , ' paddings ' , self . _padding ,
' dilations ' , self . _dilation , ' groups ' , self . _groups
if self . _groups else 1 , ' use_cudnn ' , self . _use_cudnn )
pre_bias = core . ops . conv2d ( quant_input , quant_weight , * attrs )
pre_act = dygraph_utils . _append_bias_in_dygraph ( pre_bias , self . bias ,
1 )
return dygraph_utils . _append_activation_in_dygraph ( pre_act ,
self . _act )
check_variable_and_dtype ( quant_input , ' input ' ,
[ ' float16 ' , ' float32 ' , ' float64 ' ] ,
' QuantizedConv2D ' )
attrs = {
' strides ' : self . _stride ,
' paddings ' : self . _padding ,
' dilations ' : self . _dilation ,
' groups ' : self . _groups if self . _groups else 1 ,
' use_cudnn ' : self . _use_cudnn ,
' use_mkldnn ' : False ,
}
pre_bias = self . _helper . create_variable_for_type_inference (
dtype = self . _dtype )
self . _helper . append_op (
type = self . _l_type ,
inputs = {
' Input ' : quant_input ,
' Filter ' : quant_weight ,
} ,
outputs = { " Output " : pre_bias } ,
attrs = attrs )
if self . bias is not None :
pre_act = self . _helper . create_variable_for_type_inference (
dtype = self . _dtype )
self . _helper . append_op (
type = ' elementwise_add ' ,
inputs = { ' X ' : [ pre_bias ] ,
' Y ' : [ self . bias ] } ,
outputs = { ' Out ' : [ pre_act ] } ,
attrs = { ' axis ' : 1 } )
else :
pre_act = pre_bias
if self . _padding_mode != ' zeros ' :
quant_input = F . pad ( quant_input ,
self . _reversed_padding_repeated_twice ,
mode = self . _padding_mode ,
data_format = self . _data_format )
self . _padding = 0
return self . _helper . append_activation ( pre_act , act = self . _act )
return F . conv2d (
quant_input ,
quant_weight ,
bias = self . bias ,
padding = self . _padding ,
stride = self . _stride ,
dilation = self . _dilation ,
groups = self . _groups ,
data_format = self . _data_format )
class QuantizedLinear ( layers . Layer ) :
@ -457,10 +428,9 @@ class QuantizedLinear(layers.Layer):
act_quant_layer = None ) :
super ( QuantizedLinear , self ) . __init__ ( )
# For Linear
self . _act = getattr ( layer , ' _act ' )
self . _dtype = getattr ( layer , ' _dtype ' )
self . weight = getattr ( layer , ' weight ' )
self . bias = getattr ( layer , ' bias ' )
self . name = getattr ( layer , ' name ' )
# For FakeQuant
self . _linear_quant_axis = 1
@ -503,44 +473,9 @@ class QuantizedLinear(layers.Layer):
weight = self . _weight_preprocess ( self . weight )
quant_weight = self . _fake_quant_weight ( weight )
if in_dygraph_mode ( ) :
pre_bias = _varbase_creator ( dtype = input . dtype )
core . ops . matmul ( quant_input , quant_weight , pre_bias , ' transpose_X ' ,
False , ' transpose_Y ' , False , " alpha " , 1 )
pre_act = dygraph_utils . _append_bias_in_dygraph (
pre_bias , self . bias , axis = len ( input . shape ) - 1 )
return dygraph_utils . _append_activation_in_dygraph ( pre_act ,
self . _act )
check_variable_and_dtype ( input , ' input ' ,
[ ' float16 ' , ' float32 ' , ' float64 ' ] ,
" QuantizedLinear " )
attrs = {
" transpose_X " : False ,
" transpose_Y " : False ,
" alpha " : 1 ,
}
inputs = { " X " : [ quant_input ] , " Y " : [ quant_weight ] }
mul_out = self . _helper . create_variable_for_type_inference ( self . _dtype )
self . _helper . append_op (
type = " matmul " ,
inputs = inputs ,
outputs = { " Out " : [ mul_out ] } ,
attrs = attrs )
if self . bias is not None :
pre_activation = self . _helper . create_variable_for_type_inference (
dtype = self . _dtype )
self . _helper . append_op (
type = ' elementwise_add ' ,
inputs = { ' X ' : [ mul_out ] ,
' Y ' : [ self . bias ] } ,
outputs = { ' Out ' : [ pre_activation ] } ,
attrs = { ' axis ' : len ( input . shape ) - 1 } )
else :
pre_activation = mul_out
return self . _helper . append_activation ( pre_activation , act = self . _act )
out = F . linear (
x = quant_input , weight = quant_weight , bias = self . bias , name = self . name )
return out
class MovingAverageAbsMaxScale ( layers . Layer ) :