Add quantization of multi_gru op and tests (#28615)

musl/disable_test_yolov3_temporarily
Wojciech Uss 5 years ago committed by GitHub
parent 4adddcc89a
commit 4fd4095d1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -2645,6 +2645,20 @@ PDNode *patterns::MultiGruSeq::operator()() {
return h2;
}
PDNode *patterns::MultiGru::operator()() {
auto x = pattern->NewNode(x_repr())->AsInput()->assert_is_op_input(
"multi_gru", "X");
auto gru = pattern->NewNode(gru_repr())->assert_is_op("multi_gru");
auto wx = pattern->NewNode(wx_repr())->AsInput()->assert_is_op_nth_input(
"multi_gru", "WeightX", 0);
auto wh = pattern->NewNode(wh_repr())->AsInput()->assert_is_op_nth_input(
"multi_gru", "WeightH", 0);
auto h = pattern->NewNode(h_repr())->AsOutput()->assert_is_op_output(
"multi_gru", "Hidden");
gru->LinksFrom({x, wx, wh}).LinksTo({h});
return h;
}
} // namespace ir
} // namespace framework
} // namespace paddle

@ -1490,6 +1490,21 @@ struct MultiGruSeq : public PatternBase {
PATTERN_DECL_NODE(h2);
};
// multi_gru op
// Quantization pass for multi_gru op.
// Hidden of the multi_gru op is a result of the operator().
struct MultiGru : public PatternBase {
MultiGru(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "multi_gru") {}
PDNode* operator()();
PATTERN_DECL_NODE(x);
PATTERN_DECL_NODE(gru);
PATTERN_DECL_NODE(wx);
PATTERN_DECL_NODE(wh);
PATTERN_DECL_NODE(h);
};
} // namespace patterns
// Link two ir::Nodes from each other.

File diff suppressed because it is too large Load Diff

@ -18,6 +18,7 @@
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
@ -58,6 +59,7 @@ class CPUQuantizePass : public FusePassBase {
void QuantizeMatmul(Graph* graph) const;
void QuantizeElementwiseAdd(Graph* graph) const;
void QuantizeFusionGru(Graph* graph) const;
void QuantizeMultiGru(Graph* graph) const;
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
double scale_to_one, bool is_input_unsigned,
@ -75,10 +77,14 @@ class CPUQuantizePass : public FusePassBase {
bool is_unsigned,
std::string scale_attr_name = "") const;
bool AreScalesPresentForNodes(const Node* op_node,
std::initializer_list<Node*> nodes) const;
bool AreScalesPresentForVarNames(std::vector<std::string> names) const;
bool AreScalesPresentForNodes(std::initializer_list<Node*> nodes) const;
std::pair<bool, LoDTensor> GetScaleDataByName(const std::string& name) const;
std::pair<bool, LoDTensor> GetScaleDataForNode(const Node* node) const;
LoDTensor GetScaleTensorByName(const std::string& name) const;
LoDTensor GetScaleTensorForNode(const Node* node) const;
double GetScaleValueByName(const std::string& name,
bool* is_unsigned = nullptr) const;
double GetScaleValueForNode(const Node* node,
bool* is_unsigned = nullptr) const;
bool IsOpDequantized(const Node* node) const;

@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
}
void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
const std::initializer_list<std::string> variable_names,
const std::vector<std::string> variable_names,
int* original_nodes_num, int* current_nodes_num,
std::string var_without_scale = "",
std::string var_signed = "") {
@ -402,7 +402,7 @@ TEST(CpuQuantizePass, transpose) {
static const std::initializer_list<std::string> variable_names_fusion_gru = {
"x", "wx", "wh", "b", "h"};
// x->Fusion_gru->h
// (x, wx, wh, b)->Fusion_gru->h
ProgramDesc BuildProgramDescFusionGru() {
ProgramDesc prog;
for (auto& v : variable_names_transpose) {
@ -460,7 +460,7 @@ void MainTestFusionGru(const ProgramDesc& prog, int gru_count, int quant_count,
}
TEST(CpuQuantizePass, fusion_gru) {
// x->Fusion_gru->h
// (x, wx, wh, b)->Fusion_gru->h
int gru_count = 1;
int quant_count = 1;
int dequant_count = 0;
@ -470,6 +470,128 @@ TEST(CpuQuantizePass, fusion_gru) {
dequant_count, added_nodes_count, 2. * 127, 128.);
}
const std::vector<std::string> churn_out_vars(ProgramDesc* prog,
const std::string& prefix,
int number) {
auto v = std::vector<std::string>();
for (int i = 0; i < number; ++i) {
auto name = prefix + std::to_string(i);
prog->MutableBlock(0)->Var(name);
v.push_back(name);
}
return v;
}
void create_vars(ProgramDesc* prog,
const std::initializer_list<std::string>& names) {
for (auto name : names) prog->MutableBlock(0)->Var(name);
}
void SetMultiGruOp(ProgramDesc* prog, const std::string x,
const std::vector<std::string> wx,
const std::vector<std::string> wh,
const std::vector<std::string> b, const std::string h,
int layers) {
auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType("multi_gru");
op->SetInput("X", {x});
op->SetInput("WeightX", wx);
op->SetInput("WeightH", wh);
op->SetInput("Bias", b);
op->SetOutput("Hidden", {h});
op->SetAttr("layers", layers);
op->SetAttr("origin_mode", false);
op->SetAttr("use_mkldnn", true);
op->SetAttr("name", std::string("Multi_gru"));
op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("Scale_data", 1.0f);
op->SetAttr("Shift_data", 0.0f);
}
void MainTestMultiGru(int layers) {
ProgramDesc prog;
// Create variables
create_vars(&prog, {"x", "h"});
const std::vector<std::string> wx = churn_out_vars(&prog, "wx", 2 * layers);
const std::vector<std::string> wh = churn_out_vars(&prog, "wh", 2 * layers);
const std::vector<std::string> b = churn_out_vars(&prog, "b", 2 * layers);
std::vector<std::string> all_vars;
all_vars.reserve(wx.size() + wh.size() + b.size() + 2);
all_vars.insert(all_vars.end(), wx.begin(), wx.end());
all_vars.insert(all_vars.end(), wh.begin(), wh.end());
all_vars.insert(all_vars.end(), b.begin(), b.end());
all_vars.push_back("x");
all_vars.push_back("h");
// Prepare program descriptor
SetMultiGruOp(&prog, "x", wx, wh, b, "h", layers);
// Prepare and run the pass
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
int original_nodes_num, current_nodes_num;
PreparePass(&graph, prog, all_vars, &original_nodes_num, &current_nodes_num);
// Verify graph after quantization
float scale = 2 * 127;
float shift = 128;
int quantize_nodes_count = 0;
int dequantize_nodes_count = 0;
int multi_gru_nodes_count = 0;
for (auto* node : graph->Nodes()) {
if (node->IsOp()) {
auto* op = node->Op();
if (op->Type() == "multi_gru") {
multi_gru_nodes_count++;
auto op_name = BOOST_GET_CONST(std::string, op->GetAttr("name"));
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Scale_data")), scale)
<< "Scale_data for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(float, op->GetAttr("Shift_data")), shift)
<< "Shift_data for node '" + op_name + "'.";
EXPECT_EQ(op->Input("Scale_weights").size(), 2u * layers)
<< "Scale_weights for node '" + op_name + "'.";
EXPECT_EQ(BOOST_GET_CONST(bool, op->GetAttr("force_fp32_output")), true)
<< "force_fp32_output for node '" + op_name + "'.";
} else if (op->Type() == "quantize") {
quantize_nodes_count++;
} else if (op->Type() == "dequantize") {
dequantize_nodes_count++;
}
}
}
int multi_gru_count = 1;
int quant_count = 1;
int quant_out_count = 1;
int dequant_count = 0;
int dequant_out_count = 0;
int scale_weights_count = 2 * layers;
int added_nodes_count = quant_count + quant_out_count + scale_weights_count +
dequant_count + dequant_out_count;
EXPECT_EQ(multi_gru_nodes_count, multi_gru_count);
EXPECT_EQ(quantize_nodes_count, quant_count);
EXPECT_EQ(dequantize_nodes_count, dequant_count);
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
}
TEST(CpuQuantizePass, multi_gru_1) {
int layers = 1;
MainTestMultiGru(layers);
}
TEST(CpuQuantizePass, multi_gru_2) {
int layers = 2;
MainTestMultiGru(layers);
}
TEST(CpuQuantizePass, multi_gru_3) {
int layers = 3;
MainTestMultiGru(layers);
}
static const std::initializer_list<std::string> variable_names_reshape = {
"a", "w1", "b", "c", "d", "e", "f"};

@ -66,7 +66,7 @@ class Quant2Int8MkldnnPass(object):
self._fc_ops = ['fc']
self._relu_ops = ['relu', 'relu6']
self._matmul_ops = ['matmul']
self._gru_ops = ['fusion_gru']
self._gru_ops = ['fusion_gru', 'multi_gru']
self._weight_scales = {}
# Collect the Input and Output sclaes from Fake quant models
self._var_quant_scales = {}
@ -352,6 +352,8 @@ class Quant2Int8MkldnnPass(object):
graph = self._apply_pass(graph, 'mul_lstm_fuse_pass')
graph = self._apply_pass(graph, 'fc_gru_fuse_pass')
graph = self._apply_pass(graph, 'mul_gru_fuse_pass')
graph = self._apply_pass(graph, 'multi_gru_fuse_pass')
graph = self._apply_pass(graph, 'multi_gru_seq_fuse_pass')
graph = self._apply_pass(graph, 'seq_concat_fc_fuse_pass')
graph = self._apply_pass(graph, 'squared_mat_sub_fuse_pass')
graph = self._apply_pass(graph, 'is_test_pass')
@ -450,38 +452,46 @@ class Quant2Int8MkldnnPass(object):
self._var_quant_scales[weight_var_name] = (use_unsigned_int,
lod_tensor)
def _compute_single_gru_weight_scales(wx_var_name, wh_var_name):
wx = np.array(self._load_param(self._scope, wx_var_name))
wh = np.array(self._load_param(self._scope, wh_var_name))
OC = wh.shape[0]
scale_ur = 1.0 / np.max(np.abs(
np.concatenate(
[
wx[:, :2 * OC], wh.flatten()[:2 * OC * OC].reshape(OC, 2
* OC)
],
axis=0)),
axis=0)
scale_o = 1.0 / np.max(np.abs(
np.concatenate(
[
wx[:, 2 * OC:], wh.flatten()[2 * OC * OC:].reshape(OC,
OC)
],
axis=0)),
axis=0)
gru_weights_scale = np.concatenate([scale_ur,
scale_o]).astype('float')
return self._convert_scale2tensor(gru_weights_scale)
def _compute_gru_weight_scales(wx_name, wh_name):
for op in graph.all_op_nodes():
if op.op().type() in self._gru_ops:
wx_var_name = op.input(wx_name)[0]
wh_var_name = op.input(wh_name)[0]
wx = np.array(self._load_param(self._scope, wx_var_name))
wh = np.array(self._load_param(self._scope, wh_var_name))
OC = wh.shape[0]
scale_ur = 1.0 / np.max(np.abs(
np.concatenate(
[
wx[:, :2 * OC], wh.flatten()[:2 * OC * OC]
.reshape(OC, 2 * OC)
],
axis=0)),
axis=0)
scale_o = 1.0 / np.max(np.abs(
np.concatenate(
[
wx[:, 2 * OC:], wh.flatten()[2 * OC * OC:]
.reshape(OC, OC)
],
axis=0)),
axis=0)
gru_weights_scale = np.concatenate(
[scale_ur, scale_o]).astype('float')
lod_tensor = self._convert_scale2tensor(gru_weights_scale)
use_unsigned_int = False
self._var_quant_scales[wx_var_name] = (use_unsigned_int,
lod_tensor)
assert len(op.input(wx_name)) == len(
op.input(wh_name)
), 'Mismatch in number of weights inputs ({} for WeightX vs. {} for WeightH).'.format(
len(op.input(wx_name)), len(op.input(wh_name)))
for i, wx_var_name in enumerate(op.input(wx_name)):
wh_var_name = op.input(wh_name)[i]
use_unsigned_int = False
lod_tensor = _compute_single_gru_weight_scales(
wx_var_name, wh_var_name)
self._var_quant_scales[wx_var_name] = (use_unsigned_int,
lod_tensor)
_compute_var_scales(self._conv_ops, "Filter", axis=1)
_compute_var_scales(self._fc_ops, "W", axis=0)

@ -239,7 +239,7 @@ if(LINUX AND WITH_MKLDNN)
set(QUANT2_GRU_MODEL_ARCHIVE "GRU_quant_acc.tar.gz")
set(QUANT2_GRU_MODEL_DIR "${QUANT_INSTALL_DIR}/GRU_quant2")
download_quant_model(${QUANT2_GRU_MODEL_DIR} ${QUANT2_GRU_MODEL_ARCHIVE})
set(QUANT2_GRU_OPS_TO_QUANTIZE "fusion_gru")
set(QUANT2_GRU_OPS_TO_QUANTIZE "multi_gru")
### Save FP32 model or INT8 model from Quant model

Loading…
Cancel
Save