|
|
|
@ -68,6 +68,23 @@ struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
|
|
|
|
|
|
|
|
|
|
template struct ClipAndFakeQuantFunctor<platform::CPUDeviceContext, float>;
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext, T> {
|
|
|
|
|
void operator()(const platform::CPUDeviceContext& ctx,
|
|
|
|
|
const framework::Tensor& in, const framework::Tensor& scale,
|
|
|
|
|
const int bin_cnt, framework::Tensor* out) {
|
|
|
|
|
T s = scale.data<T>()[0];
|
|
|
|
|
platform::Transform<platform::CPUDeviceContext> trans;
|
|
|
|
|
trans(ctx, in.data<T>(), in.data<T>() + in.numel(),
|
|
|
|
|
out->mutable_data<T>(ctx.GetPlace()), ClipFunctor<T>(-s, s));
|
|
|
|
|
auto out_e = framework::EigenVector<T>::Flatten(*out);
|
|
|
|
|
out_e.device(*ctx.eigen_device()) =
|
|
|
|
|
(s / bin_cnt) * (bin_cnt / s * out_e).round();
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
template struct ClipAndFakeQuantDequantFunctor<platform::CPUDeviceContext,
|
|
|
|
|
float>;
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
struct ChannelClipAndFakeQuantFunctor<platform::CPUDeviceContext, T> {
|
|
|
|
|
void operator()(const platform::CPUDeviceContext& ctx,
|
|
|
|
@ -324,24 +341,26 @@ $$Out = round(X/scale * range)$$
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class FakeQuantizeMovingAverageAbsMaxOp : public framework::OperatorWithKernel {
|
|
|
|
|
class FakeQuantOrWithDequantMovingAverageAbsMaxOp
|
|
|
|
|
: public framework::OperatorWithKernel {
|
|
|
|
|
public:
|
|
|
|
|
FakeQuantizeMovingAverageAbsMaxOp(const std::string& type,
|
|
|
|
|
const framework::VariableNameMap& inputs,
|
|
|
|
|
const framework::VariableNameMap& outputs,
|
|
|
|
|
const framework::AttributeMap& attrs)
|
|
|
|
|
FakeQuantOrWithDequantMovingAverageAbsMaxOp(
|
|
|
|
|
const std::string& type, const framework::VariableNameMap& inputs,
|
|
|
|
|
const framework::VariableNameMap& outputs,
|
|
|
|
|
const framework::AttributeMap& attrs)
|
|
|
|
|
: OperatorWithKernel(type, inputs, outputs, attrs) {}
|
|
|
|
|
|
|
|
|
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
|
|
|
|
PADDLE_ENFORCE(ctx->HasInput("X"),
|
|
|
|
|
"Input(X) of FakeQuantOrWithDequantMovingAverageAbsMaxOp "
|
|
|
|
|
"should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
|
|
|
|
"Output(Out) of FakeQuantOrWithDequantMovingAverageAbsMaxOp "
|
|
|
|
|
"should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
ctx->HasInput("X"),
|
|
|
|
|
"Input(X) of FakeQuantizeMovingAverageAbsMaxOp should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
ctx->HasOutput("Out"),
|
|
|
|
|
"Output(Out) of FakeQuantizeMovingAverageAbsMaxOp should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(ctx->HasOutput("OutScale"),
|
|
|
|
|
"Output(OutScale) of FakeQuantizeMovingAverageAbsMaxOp "
|
|
|
|
|
"should not be null");
|
|
|
|
|
ctx->HasOutput("OutScale"),
|
|
|
|
|
"Output(OutScale) of FakeQuantOrWithDequantMovingAverageAbsMaxOp "
|
|
|
|
|
"should not be null");
|
|
|
|
|
if (ctx->HasOutput("OutState")) {
|
|
|
|
|
ctx->SetOutputDim("OutState", {1});
|
|
|
|
|
}
|
|
|
|
@ -361,7 +380,7 @@ class FakeQuantizeMovingAverageAbsMaxOp : public framework::OperatorWithKernel {
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class FakeQuantizeMovingAverageAbsMaxOpMaker
|
|
|
|
|
class FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker
|
|
|
|
|
: public framework::OpProtoAndCheckerMaker {
|
|
|
|
|
public:
|
|
|
|
|
void Make() override {
|
|
|
|
@ -386,12 +405,19 @@ class FakeQuantizeMovingAverageAbsMaxOpMaker
|
|
|
|
|
"for training. Some layers may run faster when this is true.")
|
|
|
|
|
.SetDefault(false);
|
|
|
|
|
AddComment(R"DOC(
|
|
|
|
|
FakeQuantize operator is used in static quantization.
|
|
|
|
|
This is a Base Op which support FakeQuantMovingAverageAbsMaxOp and FakeQuantDequantMovingAverageAbsMaxOp
|
|
|
|
|
FakeQuantMovingAverageAbsMaxOp operator is used in static quantization.
|
|
|
|
|
|
|
|
|
|
$$scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)$$
|
|
|
|
|
$$range = 2^{bit\_length - 1} - 1$$
|
|
|
|
|
$$Out = round(X/scale * range)$$
|
|
|
|
|
|
|
|
|
|
FakeQuantDequantMovingAverageAbsMaxOp operator do the moving_average_abs_max op quant and then dequant.
|
|
|
|
|
|
|
|
|
|
$$scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)$$
|
|
|
|
|
$$range = 2^{bit\_length - 1} - 1$$
|
|
|
|
|
$$Out = round(X/scale * range) * scale / range$$
|
|
|
|
|
|
|
|
|
|
)DOC");
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
@ -477,11 +503,21 @@ REGISTER_OP_CPU_KERNEL(fake_quantize_range_abs_max,
|
|
|
|
|
ops::FakeQuantizeRangeAbsMaxKernel<CPU, float>);
|
|
|
|
|
|
|
|
|
|
REGISTER_OPERATOR(fake_quantize_moving_average_abs_max,
|
|
|
|
|
ops::FakeQuantizeMovingAverageAbsMaxOp,
|
|
|
|
|
ops::FakeQuantizeMovingAverageAbsMaxOpMaker,
|
|
|
|
|
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp,
|
|
|
|
|
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker,
|
|
|
|
|
paddle::framework::EmptyGradOpMaker);
|
|
|
|
|
|
|
|
|
|
REGISTER_OP_CPU_KERNEL(fake_quantize_moving_average_abs_max,
|
|
|
|
|
ops::FakeQuantizeMovingAverageAbsMaxKernel<CPU, float>);
|
|
|
|
|
|
|
|
|
|
REGISTER_OPERATOR(fake_quantize_dequantize_moving_average_abs_max,
|
|
|
|
|
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp,
|
|
|
|
|
ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker,
|
|
|
|
|
paddle::framework::EmptyGradOpMaker);
|
|
|
|
|
REGISTER_OP_CPU_KERNEL(
|
|
|
|
|
fake_quantize_dequantize_moving_average_abs_max,
|
|
|
|
|
ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel<CPU, float>);
|
|
|
|
|
|
|
|
|
|
REGISTER_OPERATOR(fake_channel_wise_quantize_abs_max,
|
|
|
|
|
ops::FakeChannelWiseQuantizeAbsMaxOp,
|
|
|
|
|
ops::FakeChannelWiseQuantizeAbsMaxOpMaker,
|
|
|
|
|