parent
c49e604906
commit
22715487dc
@ -0,0 +1,239 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/cpu_quantize_pass.h"
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/string/pretty_log.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
namespace {
|
||||
|
||||
void UnlinkNodes(ir::Node* a, ir::Node* b) {
|
||||
a->outputs.erase(std::remove(a->outputs.begin(), a->outputs.end(), b),
|
||||
a->outputs.end());
|
||||
b->inputs.erase(std::remove(b->inputs.begin(), b->inputs.end(), a),
|
||||
b->inputs.end());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
enum { U8_MAX = 255, S8_MAX = 127 };
|
||||
|
||||
using EigenVectorArrayMap = Eigen::Map<Eigen::Array<double, Eigen::Dynamic, 1>>;
|
||||
using string::PrettyLogDetail;
|
||||
|
||||
void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
|
||||
std::string input_name, double scale_to_one,
|
||||
bool is_unsigned,
|
||||
std::string scale_attr_name) const {
|
||||
unsigned max = is_unsigned ? U8_MAX : S8_MAX;
|
||||
float scale = scale_to_one * max;
|
||||
|
||||
// Create quantize output variable
|
||||
VarDesc quantize_out_desc(patterns::PDNodeName("quantize", "out"));
|
||||
auto* quantize_out_node = g->CreateVarNode(&quantize_out_desc);
|
||||
|
||||
// create a quantize op node
|
||||
OpDesc q_desc;
|
||||
q_desc.SetType("quantize");
|
||||
q_desc.SetInput("Input", std::vector<std::string>({input->Name()}));
|
||||
q_desc.SetOutput("Output",
|
||||
std::vector<std::string>({quantize_out_node->Name()}));
|
||||
q_desc.SetAttr("Scale", scale);
|
||||
q_desc.SetAttr("is_negative_input", !is_unsigned);
|
||||
auto quantize_op = g->CreateOpNode(&q_desc); // OpDesc will be copied.
|
||||
|
||||
// update op's input
|
||||
op->Op()->SetInput(input_name,
|
||||
std::vector<std::string>({quantize_out_node->Name()}));
|
||||
|
||||
// link quantize op
|
||||
UnlinkNodes(input, op);
|
||||
IR_NODE_LINK_TO(input, quantize_op);
|
||||
IR_NODE_LINK_TO(quantize_op, quantize_out_node);
|
||||
IR_NODE_LINK_TO(quantize_out_node, op);
|
||||
|
||||
if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
|
||||
}
|
||||
|
||||
void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
|
||||
std::string output_name,
|
||||
double scale_to_one, bool is_unsigned,
|
||||
std::string scale_attr_name) const {
|
||||
unsigned max = is_unsigned ? U8_MAX : S8_MAX;
|
||||
float scale = scale_to_one * max;
|
||||
|
||||
// Create dequantize input variable
|
||||
VarDesc dequantize_in_desc(patterns::PDNodeName("dequantize", "in"));
|
||||
auto* dequantize_in_node = g->CreateVarNode(&dequantize_in_desc);
|
||||
|
||||
// create a dequantize op node for output.
|
||||
OpDesc deq_desc;
|
||||
deq_desc.SetType("dequantize");
|
||||
deq_desc.SetInput("Input",
|
||||
std::vector<std::string>({dequantize_in_node->Name()}));
|
||||
deq_desc.SetOutput("Output", std::vector<std::string>({output->Name()}));
|
||||
deq_desc.SetAttr("Scale", scale);
|
||||
auto dequantize_op = g->CreateOpNode(&deq_desc); // OpDesc will be copied.
|
||||
|
||||
// update op's output
|
||||
op->Op()->SetOutput(output_name,
|
||||
std::vector<std::string>({dequantize_in_node->Name()}));
|
||||
|
||||
// link dequantize op
|
||||
UnlinkNodes(op, output);
|
||||
IR_NODE_LINK_TO(op, dequantize_in_node);
|
||||
IR_NODE_LINK_TO(dequantize_in_node, dequantize_op);
|
||||
IR_NODE_LINK_TO(dequantize_op, output);
|
||||
|
||||
if (!scale_attr_name.empty()) op->Op()->SetAttr(scale_attr_name, scale);
|
||||
}
|
||||
|
||||
void CPUQuantizePass::QuantizeConv(Graph* graph,
|
||||
bool with_residual_data) const {
|
||||
GraphPatternDetector gpd;
|
||||
auto pattern = gpd.mutable_pattern();
|
||||
patterns::ConvResidual conv_pattern{pattern, name_scope_};
|
||||
conv_pattern(with_residual_data);
|
||||
|
||||
int quantize_conv_count = 0;
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
VLOG(4) << "Quantize conv2d op";
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
|
||||
auto* conv_op_desc = conv_op->Op();
|
||||
|
||||
// skip if should not be quantized
|
||||
if (!conv_op_desc->HasAttr("use_quantizer") ||
|
||||
!boost::get<bool>(conv_op_desc->GetAttr("use_quantizer")))
|
||||
return;
|
||||
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
|
||||
|
||||
// get scales calculated after warmup, they scale variables to MAX=1.0
|
||||
auto scales = Get<VarQuantScale>("quant_var_scales");
|
||||
|
||||
auto input_scale = scales[conv_input->Name()].second.data<double>()[0];
|
||||
bool is_input_unsigned = scales[conv_input->Name()].first;
|
||||
QuantizeInput(g, conv_op, conv_input, "Input", input_scale,
|
||||
is_input_unsigned, "Scale_in");
|
||||
|
||||
auto filter_scale_tensor = scales[conv_filter->Name()].second;
|
||||
EigenVectorArrayMap eigen_tensor{filter_scale_tensor.data<double>(),
|
||||
filter_scale_tensor.numel(), 1};
|
||||
eigen_tensor *= static_cast<double>(S8_MAX);
|
||||
std::vector<float> filter_scale{
|
||||
filter_scale_tensor.data<double>(),
|
||||
filter_scale_tensor.data<double>() + filter_scale_tensor.numel()};
|
||||
|
||||
conv_op->Op()->SetAttr("Scale_weights", filter_scale);
|
||||
|
||||
if (with_residual_data) {
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_residual_data, conv_residual_data,
|
||||
conv_pattern);
|
||||
auto residual_scale =
|
||||
scales[conv_residual_data->Name()].second.data<double>()[0];
|
||||
bool is_residual_unsigned = scales[conv_residual_data->Name()].first;
|
||||
|
||||
QuantizeInput(g, conv_op, conv_residual_data, "ResidualData",
|
||||
residual_scale, is_residual_unsigned, "Scale_in_eltwise");
|
||||
}
|
||||
|
||||
auto output_scale = scales[conv_output->Name()].second.data<double>()[0];
|
||||
bool is_output_unsigned = scales[conv_output->Name()].first;
|
||||
DequantizeOutput(g, conv_op, conv_output, "Output", output_scale,
|
||||
is_output_unsigned, "Scale_out");
|
||||
|
||||
++quantize_conv_count;
|
||||
};
|
||||
|
||||
gpd(graph, handler);
|
||||
AddStatis(quantize_conv_count);
|
||||
|
||||
std::stringstream msg_ss;
|
||||
msg_ss << "--- quantized " << quantize_conv_count << " conv2d ops";
|
||||
if (with_residual_data) msg_ss << " with residual connection";
|
||||
PrettyLogDetail(msg_ss.str().c_str());
|
||||
}
|
||||
|
||||
void CPUQuantizePass::QuantizePool(Graph* graph) const {
|
||||
GraphPatternDetector gpd;
|
||||
auto pattern = gpd.mutable_pattern();
|
||||
patterns::Pool pool_pattern{pattern, name_scope_};
|
||||
pool_pattern();
|
||||
|
||||
int quantize_pool_count = 0;
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
VLOG(4) << "Quantize pool2d op";
|
||||
GET_IR_NODE_FROM_SUBGRAPH(pool_op, pool_op, pool_pattern);
|
||||
auto* pool_op_desc = pool_op->Op();
|
||||
|
||||
// skip if should not be quantized
|
||||
if (!pool_op_desc->HasAttr("use_quantizer") ||
|
||||
!boost::get<bool>(pool_op_desc->GetAttr("use_quantizer")))
|
||||
return;
|
||||
|
||||
GET_IR_NODE_FROM_SUBGRAPH(pool_input, pool_input, pool_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(pool_output, pool_output, pool_pattern);
|
||||
|
||||
// get scales calculated after warmup, they scale variables to MAX=1.0
|
||||
auto scales = Get<VarQuantScale>("quant_var_scales");
|
||||
|
||||
auto input_scale = scales[pool_input->Name()].second.data<double>()[0];
|
||||
bool is_input_unsigned = scales[pool_input->Name()].first;
|
||||
QuantizeInput(g, pool_op, pool_input, "X", input_scale, is_input_unsigned);
|
||||
|
||||
auto output_scale = scales[pool_output->Name()].second.data<double>()[0];
|
||||
bool is_output_unsigned = scales[pool_output->Name()].first;
|
||||
DequantizeOutput(g, pool_op, pool_output, "Out", output_scale,
|
||||
is_output_unsigned);
|
||||
|
||||
++quantize_pool_count;
|
||||
};
|
||||
|
||||
gpd(graph, handler);
|
||||
AddStatis(quantize_pool_count);
|
||||
|
||||
PrettyLogDetail("--- quantized %d pool2d ops", quantize_pool_count);
|
||||
}
|
||||
|
||||
std::unique_ptr<ir::Graph> CPUQuantizePass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
VLOG(3) << "Quantizing the graph.";
|
||||
PADDLE_ENFORCE(graph.get());
|
||||
FusePassBase::Init(name_scope_, graph.get());
|
||||
|
||||
PADDLE_ENFORCE(param_scope());
|
||||
|
||||
QuantizeConv(graph.get(), true /* with_residual_data */);
|
||||
QuantizeConv(graph.get());
|
||||
QuantizePool(graph.get());
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(cpu_quantize_pass, paddle::framework::ir::CPUQuantizePass)
|
||||
.RequirePassAttr("quant_var_scales");
|
@ -0,0 +1,66 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
/*
|
||||
* Map variable name to tensor of scaling factors scaling it to MAX=1.0.
|
||||
* bool denotes whether quantization of the variable should be done to unsigned
|
||||
* type.
|
||||
*/
|
||||
using VarQuantScale =
|
||||
std::unordered_map<std::string, std::pair<bool, LoDTensor>>;
|
||||
|
||||
/*
|
||||
* Quantize all supported operators.
|
||||
*/
|
||||
class CPUQuantizePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~CPUQuantizePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
|
||||
void QuantizeConv(Graph* graph, bool with_residual_data = false) const;
|
||||
|
||||
void QuantizePool(Graph* graph) const;
|
||||
|
||||
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
|
||||
double scale_to_one, bool is_unsigned,
|
||||
std::string scale_attr_name = "") const;
|
||||
|
||||
void DequantizeOutput(Graph* g, Node* op, Node* output,
|
||||
std::string output_name, double scale_to_one,
|
||||
bool is_unsigned,
|
||||
std::string scale_attr_name = "") const;
|
||||
|
||||
const std::string name_scope_{"quantize"};
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,211 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/cpu_quantize_pass.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include "paddle/fluid/framework/naive_executor.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
|
||||
const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& outputs, bool use_mkldnn,
|
||||
bool use_quantizer = false) {
|
||||
auto* op = prog->MutableBlock(0)->AppendOp();
|
||||
op->SetType(type);
|
||||
op->SetAttr("use_mkldnn", use_mkldnn);
|
||||
op->SetAttr("name", name);
|
||||
if (type == "conv2d") {
|
||||
op->SetInput("Input", {inputs[0]});
|
||||
op->SetInput("Filter", {inputs[1]});
|
||||
if (inputs.size() > 2)
|
||||
op->SetInput("Bias", {inputs[2]});
|
||||
else
|
||||
op->SetInput("Bias", {});
|
||||
if (inputs.size() > 3) {
|
||||
op->SetInput("ResidualData", {inputs[3]});
|
||||
op->SetAttr("fuse_residual_connection", true);
|
||||
} else {
|
||||
op->SetInput("ResidualData", {});
|
||||
op->SetAttr("fuse_residual_connection", false);
|
||||
}
|
||||
op->SetOutput("Output", {outputs[0]});
|
||||
op->SetAttr("use_quantizer", use_quantizer);
|
||||
op->SetAttr("Scale_in", 1.0f);
|
||||
op->SetAttr("Scale_out", 1.0f);
|
||||
op->SetAttr("Scale_weights", std::vector<float>{1.0f});
|
||||
} else if (type == "pool2d") {
|
||||
op->SetInput("X", {inputs[0]});
|
||||
op->SetOutput("Out", {outputs[0]});
|
||||
op->SetAttr("use_quantizer", use_quantizer);
|
||||
} else if (type == "dropout") {
|
||||
op->SetInput("X", {inputs[0]});
|
||||
op->SetOutput("Out", {outputs[0]});
|
||||
} else if (type == "fc") {
|
||||
op->SetInput("Input", {inputs[0]});
|
||||
if (inputs.size() > 1) op->SetInput("W", {inputs[1]});
|
||||
if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]});
|
||||
op->SetOutput("Out", {outputs[0]});
|
||||
}
|
||||
}
|
||||
|
||||
static const std::initializer_list<std::string> variable_names{
|
||||
"a", "w1", "c", "d", "w2", "e", "f", "g",
|
||||
"h", "w3", "b1", "i", "j", "w4", "b2"};
|
||||
// (a,w1)->Conv1->c and c->Pool1->d
|
||||
//
|
||||
// (d,w2)->Conv2->e and e->Pool2->f
|
||||
//
|
||||
// d->Dropout1->g and g->Fc1->h and (h,w3,b1,i)->Conv3->j
|
||||
//
|
||||
// (d,w4, b2)->Conv4->i
|
||||
ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) {
|
||||
ProgramDesc prog;
|
||||
for (auto& v : variable_names) {
|
||||
auto* var = prog.MutableBlock(0)->Var(v);
|
||||
if (v.find("w") == 0 || v.find("b") == 0) {
|
||||
var->SetPersistable(true);
|
||||
}
|
||||
}
|
||||
|
||||
SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"c"}, use_mkldnn,
|
||||
use_quantizer);
|
||||
SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, use_quantizer);
|
||||
|
||||
SetOp(&prog, "conv2d", "Conv2", {"d", "w2"}, {"e"}, use_mkldnn,
|
||||
use_quantizer);
|
||||
SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, use_quantizer);
|
||||
|
||||
SetOp(&prog, "dropout", "Dropout1", {"d"}, {"g"}, use_mkldnn);
|
||||
SetOp(&prog, "fc", "Fc1", {"g"}, {"h"}, use_mkldnn);
|
||||
SetOp(&prog, "conv2d", "Conv3", {"h", "w3", "b1", "i"}, {"j"}, use_mkldnn,
|
||||
use_quantizer);
|
||||
|
||||
SetOp(&prog, "conv2d", "Conv4", {"c", "w4", "b2"}, {"i"}, use_mkldnn,
|
||||
use_quantizer);
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
|
||||
const char* var_name) {
|
||||
auto x = scope->Var(var_name);
|
||||
auto tensor = x->GetMutable<LoDTensor>();
|
||||
tensor->mutable_data(place, proto::VarType::FP32,
|
||||
::paddle::memory::Allocator::kDefault, 1);
|
||||
}
|
||||
|
||||
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
|
||||
int quant_count, int dequant_count, int added_nodes_count,
|
||||
float scale) {
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
// Init scope, as it is used in pass
|
||||
auto place = paddle::platform::CPUPlace();
|
||||
NaiveExecutor exe{place};
|
||||
Scope scope;
|
||||
exe.CreateVariables(prog, 0, true, &scope);
|
||||
|
||||
auto* scales = new VarQuantScale();
|
||||
|
||||
for (auto& v : variable_names) {
|
||||
InitTensorHolder(&scope, place, v.c_str());
|
||||
LoDTensor tensor;
|
||||
tensor.Resize({1});
|
||||
auto* ptr = tensor.mutable_data<double>(place);
|
||||
ptr[0] = 2.0;
|
||||
|
||||
(*scales)[v] = std::make_pair(false, std::move(tensor));
|
||||
}
|
||||
|
||||
graph->Set(kParamScopeAttr, new framework::Scope*(&scope));
|
||||
|
||||
auto pass = PassRegistry::Instance().Get("cpu_quantize_pass");
|
||||
pass->Set("quant_var_scales", scales);
|
||||
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
|
||||
graph = pass->Apply(std::move(graph));
|
||||
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
int quantize_nodes_count = 0;
|
||||
int dequantize_nodes_count = 0;
|
||||
int conv2d_nodes_count = 0;
|
||||
int pool2d_nodes_count = 0;
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp()) {
|
||||
auto* op = node->Op();
|
||||
if (op->Type() == "conv2d") {
|
||||
conv2d_nodes_count++;
|
||||
auto op_name = boost::get<std::string>(op->GetAttr("name"));
|
||||
EXPECT_EQ(boost::get<float>(op->GetAttr("Scale_in")), scale)
|
||||
<< "Scale_in for node '" + op_name + "'.";
|
||||
EXPECT_EQ(boost::get<float>(op->GetAttr("Scale_out")), scale)
|
||||
<< "Scale_out for node '" + op_name + "'.";
|
||||
EXPECT_EQ(
|
||||
boost::get<std::vector<float>>(op->GetAttr("Scale_weights"))[0],
|
||||
scale)
|
||||
<< "Scale_weights for node '" + op_name + "'.";
|
||||
} else if (op->Type() == "pool2d") {
|
||||
pool2d_nodes_count++;
|
||||
} else if (op->Type() == "quantize") {
|
||||
quantize_nodes_count++;
|
||||
} else if (op->Type() == "dequantize") {
|
||||
dequantize_nodes_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(conv2d_nodes_count, conv_count);
|
||||
EXPECT_EQ(pool2d_nodes_count, pool_count);
|
||||
EXPECT_EQ(quantize_nodes_count, quant_count);
|
||||
EXPECT_EQ(dequantize_nodes_count, dequant_count);
|
||||
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
|
||||
}
|
||||
|
||||
TEST(CpuQuantizePass, quantize) {
|
||||
bool use_mkldnn = true;
|
||||
bool use_quantizer = true;
|
||||
// (a->QUANT1->IN1,w1)->Conv1->OUT1->DEQUANT1->c and
|
||||
// c->QUANT2->IN2->Pool1->OUT2->DEQUANT2->d
|
||||
//
|
||||
// (d->QUANT3->IN3,w2)->Conv2->OUT3->DEQUANT3->e and
|
||||
// e->QUANT4->IN4->Pool2->OUT4->DEQUANT4->f
|
||||
//
|
||||
// d->Dropout1->g and g->Fc1->h and
|
||||
// (h->QUANT5->IN5,w3,b1,i->QUANT6->IN6)->Conv3->OUT5->DEQUANT5->j
|
||||
//
|
||||
// (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i
|
||||
// Insert nodes: 7 Quant + 7 IN + 6 OUT + 6 DEQUANT
|
||||
int added_nodes = 7 + 7 + 6 + 6;
|
||||
MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 7, 6, added_nodes,
|
||||
2.0f * 127);
|
||||
}
|
||||
|
||||
TEST(CpuQuantizePass, do_not_quantize) {
|
||||
bool use_mkldnn = true;
|
||||
bool use_quantizer = false;
|
||||
int added_nodes = 0;
|
||||
MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 0, 0, added_nodes,
|
||||
1.0f);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(cpu_quantize_pass);
|
@ -0,0 +1,58 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
std::unique_ptr<ir::Graph> CPUQuantizePlacementPass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
VLOG(3) << "Marks operators which are to be quantized.";
|
||||
const auto& excluded_ids_list =
|
||||
Get<std::unordered_set<int>>("quantize_excluded_op_ids");
|
||||
const auto& op_types_list =
|
||||
Get<std::unordered_set<std::string>>("quantize_enabled_op_types");
|
||||
for (const Node* n : graph->Nodes()) {
|
||||
if (n->IsOp()) {
|
||||
if (std::find(excluded_ids_list.begin(), excluded_ids_list.end(),
|
||||
n->id()) != excluded_ids_list.end())
|
||||
continue;
|
||||
auto* op = n->Op();
|
||||
if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) {
|
||||
if (op_types_list.empty()) {
|
||||
op->SetAttr("use_quantizer", true);
|
||||
} else if (std::find(op_types_list.begin(), op_types_list.end(),
|
||||
n->Name()) != op_types_list.end()) {
|
||||
op->SetAttr("use_quantizer", true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(cpu_quantize_placement_pass,
|
||||
paddle::framework::ir::CPUQuantizePlacementPass)
|
||||
// a vector of operator type names to be quantized ("conv2d" etc.)
|
||||
.RequirePassAttr("quantize_enabled_op_types")
|
||||
// a vector of operator ids that are to be excluded from quantization
|
||||
.RequirePassAttr("quantize_excluded_op_ids");
|
@ -0,0 +1,34 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "paddle/fluid/framework/ir/pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
/*
|
||||
* Specifies which operators should be quantized.
|
||||
*/
|
||||
class CPUQuantizePlacementPass : public Pass {
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,129 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/cpu_quantize_placement_pass.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <boost/logic/tribool.hpp>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
|
||||
const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& outputs,
|
||||
boost::tribool use_quantizer) {
|
||||
auto* op = prog->MutableBlock(0)->AppendOp();
|
||||
|
||||
op->SetType(type);
|
||||
|
||||
if (!boost::indeterminate(use_quantizer))
|
||||
op->SetAttr("use_quantizer", use_quantizer);
|
||||
|
||||
if (type == "conv2d") {
|
||||
op->SetAttr("name", name);
|
||||
op->SetInput("Input", {inputs[0]});
|
||||
op->SetInput("Filter", {inputs[1]});
|
||||
op->SetInput("Bias", {inputs[2]});
|
||||
} else if (type == "relu") {
|
||||
op->SetInput("X", inputs);
|
||||
} else if (type == "concat") {
|
||||
op->SetAttr("axis", 1);
|
||||
op->SetInput("X", {inputs[0], inputs[1]});
|
||||
} else if (type == "pool2d") {
|
||||
op->SetInput("X", {inputs[0]});
|
||||
} else {
|
||||
FAIL() << "Unexpected operator type.";
|
||||
}
|
||||
op->SetOutput("Out", {outputs[0]});
|
||||
}
|
||||
|
||||
// operator use_quantizer
|
||||
// ---------------------------------------
|
||||
// (a,b)->concat->c none
|
||||
// (c,weights,bias)->conv->f false
|
||||
// f->relu->g none
|
||||
// g->pool->h false
|
||||
// (h,weights2,bias2)->conv->k false
|
||||
// k->pool->l false
|
||||
ProgramDesc BuildProgramDesc() {
|
||||
ProgramDesc prog;
|
||||
|
||||
for (auto& v :
|
||||
std::vector<std::string>({"a", "b", "c", "weights", "bias", "f", "g",
|
||||
"h", "weights2", "bias2", "k", "l"})) {
|
||||
auto* var = prog.MutableBlock(0)->Var(v);
|
||||
var->SetType(proto::VarType::SELECTED_ROWS);
|
||||
if (v == "weights" || v == "bias") {
|
||||
var->SetPersistable(true);
|
||||
}
|
||||
}
|
||||
|
||||
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate);
|
||||
SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false);
|
||||
SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate);
|
||||
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false);
|
||||
SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false);
|
||||
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false);
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
void MainTest(std::initializer_list<std::string> quantize_enabled_op_types,
|
||||
std::initializer_list<int> quantize_excluded_op_ids,
|
||||
unsigned expected_use_quantizer_true_count) {
|
||||
auto prog = BuildProgramDesc();
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass");
|
||||
pass->Set("quantize_enabled_op_types",
|
||||
new std::unordered_set<std::string>(quantize_enabled_op_types));
|
||||
pass->Set("quantize_excluded_op_ids",
|
||||
new std::unordered_set<int>(quantize_excluded_op_ids));
|
||||
|
||||
graph = pass->Apply(std::move(graph));
|
||||
|
||||
unsigned use_quantizer_true_count = 0;
|
||||
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp()) {
|
||||
auto* op = node->Op();
|
||||
if (op->HasAttr("use_quantizer") &&
|
||||
boost::get<bool>(op->GetAttr("use_quantizer"))) {
|
||||
++use_quantizer_true_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count);
|
||||
}
|
||||
|
||||
TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); }
|
||||
|
||||
TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
|
||||
MainTest({"conv2d"}, {4}, 1);
|
||||
}
|
||||
|
||||
TEST(QuantizerPlacementPass, excluded_none) {
|
||||
// 2 conv + 2 pool
|
||||
MainTest({}, {}, 4);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(cpu_quantize_placement_pass);
|
@ -0,0 +1,39 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/ir/runtime_context_cache_pass.h"
|
||||
#include <memory>
|
||||
#include "paddle/fluid/framework/operator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
std::unique_ptr<ir::Graph> RuntimeContextCachePass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
VLOG(3) << "Applies Runtime Context Cache strategy.";
|
||||
for (const Node* n : graph->Nodes()) {
|
||||
if (n->IsOp()) {
|
||||
n->Op()->SetAttr(kEnableCacheRuntimeContext, true);
|
||||
}
|
||||
}
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(runtime_context_cache_pass,
|
||||
paddle::framework::ir::RuntimeContextCachePass);
|
@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "paddle/fluid/framework/ir/pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class RuntimeContextCachePass : public Pass {
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue