|
|
|
@ -14,6 +14,7 @@
|
|
|
|
|
|
|
|
|
|
#include "paddle/fluid/inference/api/mkldnn_quantizer.h"
|
|
|
|
|
#include <algorithm>
|
|
|
|
|
#include <limits>
|
|
|
|
|
#include <map>
|
|
|
|
|
#include <numeric>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
@ -37,6 +38,7 @@ using framework::ir::Graph;
|
|
|
|
|
using ConstEigenVectorArrayMap =
|
|
|
|
|
Eigen::Map<const Eigen::Array<float, Eigen::Dynamic, 1>>;
|
|
|
|
|
using string::PrettyLogH1;
|
|
|
|
|
static LoDTensor CreateScaleTensor(int64_t channels_num = 1);
|
|
|
|
|
|
|
|
|
|
bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
|
|
|
|
|
PrettyLogH1("--- Calculating scales for quantization");
|
|
|
|
@ -52,7 +54,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
|
|
|
|
|
for (auto const& conn : connections) {
|
|
|
|
|
for (const auto& var_name : conn.second) {
|
|
|
|
|
// skip if scale already computed
|
|
|
|
|
if (scales_.find(var_name) != scales_.end()) return;
|
|
|
|
|
if (scales_.find(var_name) != scales_.end()) continue;
|
|
|
|
|
|
|
|
|
|
auto* var = predictor_.sub_scope_->FindVar(var_name);
|
|
|
|
|
PADDLE_ENFORCE(var, "%s is not in the scope", var_name);
|
|
|
|
@ -62,29 +64,49 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
|
|
|
|
|
|
|
|
|
|
// force unsigned type if already know it
|
|
|
|
|
bool is_unsigned = false;
|
|
|
|
|
if (is_output && op->Type() == "conv2d") {
|
|
|
|
|
// output of conv2d with relu must be unsigned
|
|
|
|
|
is_unsigned = (op->HasAttr("fuse_relu") &&
|
|
|
|
|
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
|
|
|
|
|
(op->HasAttr("fuse_brelu") &&
|
|
|
|
|
boost::get<bool>(op->GetAttr("fuse_brelu")));
|
|
|
|
|
} else if (is_output && op->Type() == "relu") {
|
|
|
|
|
is_unsigned = true;
|
|
|
|
|
} else if (is_output &&
|
|
|
|
|
(op->Type() == "pool2d" || op->Type() == "transpose2" ||
|
|
|
|
|
op->Type() == "reshape2" || op->Type() == "concat")) {
|
|
|
|
|
// output of ops with unsigned input must be unsigned
|
|
|
|
|
is_unsigned = true;
|
|
|
|
|
for (auto input_var_name : op->Input("X")) {
|
|
|
|
|
bool compute_scale = true;
|
|
|
|
|
if (is_output) {
|
|
|
|
|
if (op->Type() == "conv2d") {
|
|
|
|
|
// output of conv2d with relu must be unsigned
|
|
|
|
|
is_unsigned = (op->HasAttr("fuse_relu") &&
|
|
|
|
|
boost::get<bool>(op->GetAttr("fuse_relu"))) ||
|
|
|
|
|
(op->HasAttr("fuse_brelu") &&
|
|
|
|
|
boost::get<bool>(op->GetAttr("fuse_brelu")));
|
|
|
|
|
} else if (op->Type() == "relu") {
|
|
|
|
|
is_unsigned = true;
|
|
|
|
|
} else if (op->Type() == "transpose2" ||
|
|
|
|
|
op->Type() == "reshape2" || op->Type() == "pool2d") {
|
|
|
|
|
auto input_var_name = op->Input("X")[0];
|
|
|
|
|
PADDLE_ENFORCE(scales_.find(input_var_name) != scales_.end(),
|
|
|
|
|
"Input scales must be calculated before the "
|
|
|
|
|
"output scales to infer if output is unsigned.");
|
|
|
|
|
is_unsigned = is_unsigned && scales_[input_var_name].first;
|
|
|
|
|
if (scales_.find(input_var_name) != scales_.end()) {
|
|
|
|
|
scales_[var_name] = scales_[input_var_name];
|
|
|
|
|
}
|
|
|
|
|
compute_scale = false;
|
|
|
|
|
} else if (op->Type() == "concat") {
|
|
|
|
|
// output of ops with unsigned input must be unsigned
|
|
|
|
|
is_unsigned = true;
|
|
|
|
|
double min_scale = std::numeric_limits<double>::max();
|
|
|
|
|
for (auto input_var_name : op->Input("X")) {
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
scales_.find(input_var_name) != scales_.end(),
|
|
|
|
|
"Input scales must be calculated before the "
|
|
|
|
|
"output scales to infer if output is unsigned.");
|
|
|
|
|
is_unsigned = is_unsigned && scales_[input_var_name].first;
|
|
|
|
|
min_scale = std::min(
|
|
|
|
|
min_scale,
|
|
|
|
|
scales_[input_var_name].second.data<double>()[0]);
|
|
|
|
|
}
|
|
|
|
|
auto scale_tensor = CreateScaleTensor();
|
|
|
|
|
scale_tensor.data<double>()[0] = min_scale;
|
|
|
|
|
scales_[var_name] = {is_unsigned, scale_tensor};
|
|
|
|
|
compute_scale = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
CalculateSingleScale(op->Type(), conn.first, var_name, *var_tensor,
|
|
|
|
|
is_unsigned);
|
|
|
|
|
if (compute_scale)
|
|
|
|
|
CalculateSingleScale(op->Type(), conn.first, var_name,
|
|
|
|
|
*var_tensor, is_unsigned);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
@ -127,6 +149,13 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateSingleScale(
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static LoDTensor CreateScaleTensor(int64_t channels_num) {
|
|
|
|
|
LoDTensor scale_tensor;
|
|
|
|
|
scale_tensor.Resize({channels_num});
|
|
|
|
|
scale_tensor.mutable_data<double>(CPUPlace());
|
|
|
|
|
return scale_tensor;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int> AnalysisPredictor::MkldnnQuantizer::ExpandQuantizedBins(
|
|
|
|
|
std::vector<int> quantized_bins, std::vector<int> reference_bins) const {
|
|
|
|
|
std::vector<int> expanded_quantized_bins(reference_bins.size(), 0);
|
|
|
|
@ -263,11 +292,8 @@ AnalysisPredictor::MkldnnQuantizer::GetKLScalingFactor(
|
|
|
|
|
min_kl_index = starting_iter;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LoDTensor scale_tensor;
|
|
|
|
|
scale_tensor.Resize({1});
|
|
|
|
|
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
|
|
|
|
|
|
|
|
|
|
scale_ptr[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
|
|
|
|
|
LoDTensor scale_tensor = CreateScaleTensor();
|
|
|
|
|
scale_tensor.data<double>()[0] = 1.0 / ((min_kl_index + 0.5) * bin_width);
|
|
|
|
|
|
|
|
|
|
return std::make_pair(is_unsigned, scale_tensor);
|
|
|
|
|
}
|
|
|
|
@ -285,10 +311,8 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxScalingFactor(
|
|
|
|
|
"Tensor is claimed to be unsigned, but its min value (%f) is < 0.0",
|
|
|
|
|
min_val);
|
|
|
|
|
|
|
|
|
|
LoDTensor scale_tensor;
|
|
|
|
|
scale_tensor.Resize({1});
|
|
|
|
|
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
|
|
|
|
|
scale_ptr[0] = 1.0 / max_abs;
|
|
|
|
|
LoDTensor scale_tensor = CreateScaleTensor();
|
|
|
|
|
scale_tensor.data<double>()[0] = 1.0 / max_abs;
|
|
|
|
|
|
|
|
|
|
return std::make_pair(is_unsigned, scale_tensor);
|
|
|
|
|
}
|
|
|
|
@ -308,8 +332,7 @@ AnalysisPredictor::MkldnnQuantizer::GetMaxChScalingFactor(
|
|
|
|
|
min_val);
|
|
|
|
|
|
|
|
|
|
int channels = var_tensor.dims()[0];
|
|
|
|
|
LoDTensor scale_tensor;
|
|
|
|
|
scale_tensor.Resize({channels});
|
|
|
|
|
LoDTensor scale_tensor = CreateScaleTensor(channels);
|
|
|
|
|
auto* scale_ptr = scale_tensor.mutable_data<double>(CPUPlace());
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < channels; ++i) {
|
|
|
|
|