@ -23,6 +23,10 @@ from ...common import dtype as mstype
__all__ = ["MinMaxUpdatePerLayer",
@ -163,6 +167,237 @@ class MinMaxUpdatePerChannel(PrimitiveWithInfer):
return min_type, max_type
class FakeQuantWithMinMaxVars(PrimitiveWithInfer):
Fake-quantize the input by min and max.
num_bits (int): Quantization bitwidth; between 2 and 16. Default: 8.
narrow_range (bool): Whether the quantization algorithm uses narrow range or not.
if True, the quantization range is [0, 2^num_bits-1]. Otherwise, the quantization
range is [1, 2^num_bits-1]. Default: False.
- **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
- **min** (Tensor) - Value of the min range of the input data x.
- **max** (Tensor) - Value of the max range of the input data x.
- Tensor, the data type and shape of output tensor is the same as input x.
>>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
>>> min_tensor = Tensor(np.array([-6]), mstype.float32)
>>> max_tensor = Tensor(np.array([6]), mstype.float32)
>>> output_tensor = FakeQuantWithMinMaxVars(num_bits=8, narrow_range=False)(
>>> input_tensor, min_tensor, max_tensor)
>>> output_tensor shape: (3, 16, 5, 5) data type: mstype.float32
def __init__(self,
self.num_bits = validator.check_positive_int(num_bits, 'num_bits', self.name)
self.num_bits = validator.check_int_range(self.num_bits, 2, 16, Rel.INC_BOTH, 'num_bits', self.name)
self.narrow_range = validator.check_value_type(
'narrow_range', narrow_range, (bool,), self.name)
def check_broadcast(self, min_shape, input_shape):
shape_val = 1
for shape in input_shape:
shape_val = shape_val * shape
if min_shape[0] > 1 and min_shape[0] != shape_val:
raise ValueError(f"For '{self.name}', the shape of \'min\' cannot broadcast to the shape of \'x\'.")
def infer_shape(self, x_shape, min_shape, max_shape):
validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name)
validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name)
validator.check_integer("min shape", len(min_shape), 1, Rel.EQ, self.name)
self.check_broadcast(min_shape, x_shape)
return x_shape
def infer_dtype(self, x_type, min_type, max_type):
valid_types = (mstype.float16, mstype.float32)
validator.check_tensor_type_same({'x': x_type}, valid_types, self.name)
validator.check_tensor_type_same({'min': min_type}, valid_types, self.name)
validator.check_tensor_type_same({'max': max_type}, valid_types, self.name)
return x_type
class FakeQuantWithMinMaxVarsGradient(PrimitiveWithInfer):
Performs grad of FakeQuantWithMinMaxVars operation.
num_bits (int): Quantization bitwidth; between 2 and 16, inclusive. Default: 8.
narrow_range (bool): Whether the quantization algorithm uses narrow range or not.
if True, the quantization range is [0, 2^num_bits-1]. Otherwise, the quantization
range is [1, 2^num_bits-1]. Default: False.
- **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
- **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
- **min** (Tensor) - Value of the min range of the input data x.
- **max** (Tensor) - Value of the max range of the input data x.
- **backprops_wrt_x** (Tensor) - The gradient of input x, with the same shape date type as input x.
- **backprops_wrt_min** (Tensor) - The gradient of input min, with the same shape date type as input min.
- **backprops_wrt_max** (Tensor) - The gradient of input max, with the same shape date type as input max.
>>> gradients = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
>>> input_tensor = Tensor(np.random.rand(3, 16, 5, 5), mstype.float32)
>>> min_tensor = Tensor(np.array([-6]), mstype.float32)
>>> max_tensor = Tensor(np.array([6]), mstype.float32)
>>> x_gradient, min_gradient, max_gradient = FakeQuantWithMinMaxVarsGradient(num_bits=8,narrow_range=False)
>>> (gradients, input_tensor, min_tensor, max_tensor)
>>> x_gradient shape: (3, 16, 5, 5) data type: mstype.float32
>>> min_gradient shape: (1,) data type: mstype.float32
>>> max_gradient shape: (1,) data type: mstype.float32
def __init__(self,
self.num_bits = validator.check_positive_int(num_bits, 'num_bits', self.name)
self.num_bits = validator.check_int_range(self.num_bits, 2, 16, Rel.INC_BOTH, 'num_bits', self.name)
self.narrow_range = validator.check_value_type(
'narrow_range', narrow_range, (bool,), self.name)
def check_broadcast(self, min_shape, input_shape):
shape_val = 1
for shape in input_shape:
shape_val = shape_val * shape
if min_shape[0] > 1 and min_shape[0] != shape_val:
raise ValueError(f"For '{self.name}', the shape of \'min\' cannot broadcast to the shape of \'x\'.")
def infer_shape(self, dout_shape, x_shape, min_shape, max_shape):
validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name)
validator.check("dout shape", dout_shape, "x shape", x_shape, Rel.EQ, self.name)
validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name)
validator.check_integer("min shape", len(min_shape), 1, Rel.EQ, self.name)
self.check_broadcast(min_shape, x_shape)
return x_shape, min_shape, max_shape
def infer_dtype(self, dout_type, x_type, min_type, max_type):
valid_types = (mstype.float16, mstype.float32)
validator.check_tensor_type_same({'dout': dout_type}, valid_types, self.name)
validator.check_tensor_type_same({'x': x_type}, valid_types, self.name)
validator.check_tensor_type_same({'min': min_type}, valid_types, self.name)
validator.check_tensor_type_same({'max': max_type}, valid_types, self.name)
return x_type, min_type, max_type
class FakeQuantWithMinMaxVarsPerChannel(PrimitiveWithInfer):
Fake-quantize the input and one of shape: [d], [b, d], [b, h, w, d] by per-channel min and max
num_bits (int): Quantization bitwidth; between 2 and 16, inclusive. Default: 8.
narrow_range (bool): Whether the quantization algorithm uses narrow range or not.
if True, the quantization range is [0, 2^num_bits-1]. Otherwise, the quantization
range is [1, 2^num_bits-1]. Default: False.
- **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
- **min** (Tensor) - Value of the min range of the input data x.
- **max** (Tensor) - Value of the max range of the input data x.
- Tensor, the data type and shape of output tensor is the same as input x.
>>> input_tensor = Tensor(np.random.rand(3, 16, 3, 4), mstype.float32)
>>> min_tensor = Tensor(np.array([-6, -1, -2, -3]), mstype.float32)
>>> max_tensor = Tensor(np.array([6, 1, 2, 3]), mstype.float32)
>>> output_tensor = FakeQuantWithMinMaxVars(num_bits=8, narrow_range=False)(
>>> input_tensor, min_tensor, max_tensor)
>>> output_tensor shape: (3, 16, 3, 4) data type: mstype.float32
def __init__(self,
self.num_bits = validator.check_positive_int(num_bits, 'num_bits', self.name)
self.num_bits = validator.check_int_range(self.num_bits, 2, 16, Rel.INC_BOTH, 'num_bits', self.name)
self.narrow_range = validator.check_value_type(
'narrow_range', narrow_range, (bool,), self.name)
def infer_shape(self, x_shape, min_shape, max_shape):
validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name)
validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name)
validator.check_integer("min shape", len(min_shape), 1, Rel.EQ, self.name)
validator.check("min shape", min_shape[0], "x shape", x_shape[-1], Rel.EQ, self.name)
return x_shape
def infer_dtype(self, x_type, min_type, max_type):
valid_types = (mstype.float16, mstype.float32)
validator.check_tensor_type_same({'x': x_type}, valid_types, self.name)
validator.check_tensor_type_same({'min': min_type}, valid_types, self.name)
validator.check_tensor_type_same({'max': max_type}, valid_types, self.name)
return x_type
class FakeQuantWithMinMaxVarsPerChannelGradient(PrimitiveWithInfer):
Performs grad of FakeQuantWithMinMaxVars operation.
num_bits (int): Quantization bitwidth; between 2 and 16, inclusive. Default: 8.
narrow_range (bool): Whether the quantization algorithm uses narrow range or not.
if True, the quantization range is [0, 2^num_bits-1]. Otherwise, the quantization
range is [1, 2^num_bits-1]. Default: False.
- **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
- **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
- **min** (Tensor) - Value of the min range of the input data x.
- **max** (Tensor) - Value of the max range of the input data x.
- **backprops_wrt_x** (Tensor) - The gradient of input x, with the same shape date type as input x.
- **backprops_wrt_min** (Tensor) - The gradient of input min, with the same shape date type as input min.
- **backprops_wrt_max** (Tensor) - The gradient of input max, with the same shape date type as input max.
>>> gradients = Tensor(np.random.rand(3, 16, 3, 4), mstype.float32)
>>> input_tensor = Tensor(np.random.rand(3, 16, 3, 4), mstype.float32)
>>> min_tensor = Tensor(np.array([-6, -1, -2, -3]), mstype.float32)
>>> max_tensor = Tensor(np.array([6, 1, 2, 3]), mstype.float32)
>>> x_gradient, min_gradient, max_gradient = FakeQuantWithMinMaxVarsPerChannelGradient(
>>> num_bits=8, narrow_range=False)(
>>> gradients, input_tensor, min_tensor, max_tensor)
>>> x_gradient shape: (3, 16, 3, 4) data type: mstype.float32
>>> min_gradient shape: (4,) data type: mstype.float32
>>> max_gradient shape: (4,) data type: mstype.float32
def __init__(self,
self.num_bits = validator.check_positive_int(num_bits, 'num_bits', self.name)
self.num_bits = validator.check_int_range(self.num_bits, 2, 16, Rel.INC_BOTH, 'num_bits', self.name)
self.narrow_range = validator.check_value_type(
'narrow_range', narrow_range, (bool,), self.name)
def infer_shape(self, dout_shape, x_shape, min_shape, max_shape):
validator.check_integer("x rank", len(x_shape), 1, Rel.GE, self.name)
validator.check("dout shape", dout_shape, "x shape", x_shape, Rel.EQ, self.name)
validator.check("min shape", min_shape, "max shape", max_shape, Rel.EQ, self.name)
validator.check_integer("min shape", len(min_shape), 1, Rel.EQ, self.name)
validator.check("min shape", min_shape[0], "x shape", x_shape[-1], Rel.EQ, self.name)
return x_shape, min_shape, max_shape
def infer_dtype(self, dout_type, x_type, min_type, max_type):
valid_types = (mstype.float16, mstype.float32)
validator.check_tensor_type_same({'dout': dout_type}, valid_types, self.name)
validator.check_tensor_type_same({'x': x_type}, valid_types, self.name)
validator.check_tensor_type_same({'min': min_type}, valid_types, self.name)
validator.check_tensor_type_same({'max': max_type}, valid_types, self.name)
return x_type, min_type, max_type
class FakeQuantPerLayer(PrimitiveWithInfer):
Simulates the quantize and dequantize operations in training time.