|
|
|
@ -46,20 +46,20 @@ def create_quant_config(quant_observer=(nn.FakeQuantWithMinMaxObserver, nn.FakeQ
|
|
|
|
|
Config the observer type of weights and data flow with quant params.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element represent
|
|
|
|
|
weights and second element represent data flow.
|
|
|
|
|
quant_observer (Union[Observer, list, tuple]): The observer type to do quantization. The first element
|
|
|
|
|
represents weights and second element represents data flow.
|
|
|
|
|
Default: (nn.FakeQuantWithMinMaxObserver, nn.FakeQuantWithMinMaxObserver)
|
|
|
|
|
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during
|
|
|
|
|
eval. The first element represent weights and second element represent data flow. Default: (0, 0)
|
|
|
|
|
eval. The first element represents weights and second element represents data flow. Default: (0, 0)
|
|
|
|
|
quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
|
|
|
|
|
element represent weights and second element represent data flow.
|
|
|
|
|
element represents weights and second element represents data flow.
|
|
|
|
|
Default: (QuantDtype.INT8, QuantDtype.INT8)
|
|
|
|
|
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
|
|
|
|
|
then base on per channel otherwise base on per layer. The first element represent weights
|
|
|
|
|
and second element represent data flow. Default: (False, False)
|
|
|
|
|
then base on per channel otherwise base on per layer. The first element represents weights
|
|
|
|
|
and second element represents data flow. Default: (False, False)
|
|
|
|
|
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
|
|
|
|
|
base on symmetric otherwise base on asymmetric. The first element represent weights and second
|
|
|
|
|
element represent data flow. Default: (False, False)
|
|
|
|
|
base on symmetric otherwise base on asymmetric. The first element represents weights and second
|
|
|
|
|
element represents data flow. Default: (False, False)
|
|
|
|
|
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
|
|
|
|
|
The first element represents weights and the second element represents data flow. Default: (False, False)
|
|
|
|
|
|
|
|
|
@ -124,16 +124,16 @@ class QuantizationAwareTraining(Quantizer):
|
|
|
|
|
bn_fold (bool): Flag to used bn fold ops for simulation inference operation. Default: True.
|
|
|
|
|
freeze_bn (int): Number of steps after which BatchNorm OP parameters used total mean and variance. Default: 1e7.
|
|
|
|
|
quant_delay (Union[int, list, tuple]): Number of steps after which weights and activations are quantized during
|
|
|
|
|
eval. The first element represent weights and second element represent data flow. Default: (0, 0)
|
|
|
|
|
eval. The first element represents weights and second element represents data flow. Default: (0, 0)
|
|
|
|
|
quant_dtype (Union[QuantDtype, list, tuple]): Datatype to use for quantize weights and activations. The first
|
|
|
|
|
element represent weights and second element represent data flow.
|
|
|
|
|
element represents weights and second element represents data flow.
|
|
|
|
|
Default: (QuantDtype.INT8, QuantDtype.INT8)
|
|
|
|
|
per_channel (Union[bool, list, tuple]): Quantization granularity based on layer or on channel. If `True`
|
|
|
|
|
then base on per channel otherwise base on per layer. The first element represent weights
|
|
|
|
|
and second element represent data flow. Default: (False, False)
|
|
|
|
|
then base on per channel otherwise base on per layer. The first element represents weights
|
|
|
|
|
and second element represents data flow. Default: (False, False)
|
|
|
|
|
symmetric (Union[bool, list, tuple]): Whether the quantization algorithm is symmetric or not. If `True` then
|
|
|
|
|
base on symmetric otherwise base on asymmetric. The first element represent weights and second
|
|
|
|
|
element represent data flow. Default: (False, False)
|
|
|
|
|
base on symmetric otherwise base on asymmetric. The first element represents weights and second
|
|
|
|
|
element represents data flow. Default: (False, False)
|
|
|
|
|
narrow_range (Union[bool, list, tuple]): Whether the quantization algorithm uses narrow range or not.
|
|
|
|
|
The first element represents weights and the second element represents data flow. Default: (False, False)
|
|
|
|
|
optimize_option (Union[OptimizeOption, list, tuple]): Specifies the quant algorithm and options, currently only
|
|
|
|
|