Merge pull request #13485 from tpatejko/tpatejko/capi-resnet-conv-elementwise-fusion
MKLDNN conv+elementwise_add fusion for residual connections in Resnetce
commit
42aa1d409d
@ -0,0 +1,154 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph_traits.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
namespace {
|
||||
|
||||
// The function keeps the graph consistent by replacing
|
||||
// a node 'from' in the set of inputs nodes
|
||||
// of the visited node by a node 'to'.
|
||||
void CorrectGraphEdges(Graph* graph, Node* from, Node* to) {
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
auto from_in_inputs =
|
||||
std::find(std::begin(node.inputs), std::end(node.inputs), from);
|
||||
|
||||
if (from_in_inputs != std::end(node.inputs)) {
|
||||
IR_NODE_LINK_TO(to, (&node));
|
||||
|
||||
auto inputs = node.Op()->Inputs();
|
||||
|
||||
using input_type = VariableNameMap::value_type;
|
||||
|
||||
std::for_each(std::begin(inputs), std::end(inputs),
|
||||
[from, to, &node](const input_type& i) -> void {
|
||||
auto param_names = i.second;
|
||||
auto pi = std::find(std::begin(param_names),
|
||||
std::end(param_names), from->Name());
|
||||
|
||||
if (pi != std::end(param_names)) {
|
||||
node.Op()->SetInput(i.first, {to->Name()});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
using graph_ptr = std::unique_ptr<ir::Graph>;
|
||||
|
||||
graph_ptr ConvElementwiseAddMKLDNNFusePass::ApplyImpl(graph_ptr graph) const {
|
||||
FusePassBase::Init(name_scope_, graph.get());
|
||||
|
||||
GraphPatternDetector gpd;
|
||||
auto pattern = gpd.mutable_pattern();
|
||||
|
||||
patterns::Conv conv_pattern{pattern, name_scope_};
|
||||
auto conv_output = conv_pattern();
|
||||
|
||||
patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_};
|
||||
elementwise_add_pattern(conv_output);
|
||||
|
||||
conv_output->AsIntermediate();
|
||||
|
||||
auto conv_op_has_bias = [](const Node& conv_op) -> std::pair<bool, Node*> {
|
||||
auto bias_input_names = conv_op.Op()->Inputs();
|
||||
auto bias_it = bias_input_names.find("Bias");
|
||||
|
||||
if (bias_it != std::end(bias_input_names)) {
|
||||
bool has_bias = !bias_it->second.empty();
|
||||
|
||||
if (has_bias) {
|
||||
auto conv_bias_names = bias_it->second;
|
||||
auto conv_bias_names_it =
|
||||
std::find_if(std::begin(conv_op.inputs), std::end(conv_op.inputs),
|
||||
[&conv_bias_names](Node* n) -> bool {
|
||||
return n->Name() == conv_bias_names[0];
|
||||
});
|
||||
return std::make_pair(has_bias, *conv_bias_names_it);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(false, nullptr);
|
||||
};
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
|
||||
elementwise_add_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x,
|
||||
elementwise_add_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
|
||||
elementwise_add_pattern);
|
||||
|
||||
if (FindFuseOption(*conv_op, *elementwise_add_op) != FUSE_MKLDNN) return;
|
||||
|
||||
OpDesc op_desc;
|
||||
op_desc.SetType("conv2d");
|
||||
|
||||
op_desc.SetInput("Input", {conv_input->Name()});
|
||||
op_desc.SetInput("Filter", {conv_filter->Name()});
|
||||
op_desc.SetInput("ResidualData", {elementwise_add_x->Name()});
|
||||
op_desc.SetOutput("Output", {conv_output->Name()});
|
||||
|
||||
bool has_bias;
|
||||
Node* conv_bias;
|
||||
|
||||
std::tie(has_bias, conv_bias) = conv_op_has_bias(*conv_op);
|
||||
|
||||
if (has_bias) {
|
||||
op_desc.SetInput("Bias", {conv_bias->Name()});
|
||||
}
|
||||
|
||||
for (const auto& attr : conv_op->Op()->GetAttrMap()) {
|
||||
op_desc.SetAttr(attr.first, attr.second);
|
||||
}
|
||||
|
||||
op_desc.SetAttr("fuse_residual_connection", true);
|
||||
|
||||
auto fused_conv_op = g->CreateOpNode(&op_desc);
|
||||
|
||||
IR_NODE_LINK_TO(conv_input, fused_conv_op);
|
||||
IR_NODE_LINK_TO(conv_filter, fused_conv_op);
|
||||
IR_NODE_LINK_TO(elementwise_add_x, fused_conv_op);
|
||||
IR_NODE_LINK_TO(fused_conv_op, conv_output);
|
||||
|
||||
if (has_bias) {
|
||||
IR_NODE_LINK_TO(conv_bias, fused_conv_op);
|
||||
}
|
||||
|
||||
CorrectGraphEdges(g, elementwise_add_out, conv_output);
|
||||
GraphSafeRemoveNodes(g, {elementwise_add_out, conv_op, elementwise_add_op});
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
|
||||
return graph;
|
||||
}
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass,
|
||||
paddle::framework::ir::ConvElementwiseAddMKLDNNFusePass);
|
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class ConvElementwiseAddMKLDNNFusePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~ConvElementwiseAddMKLDNNFusePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
const std::string name_scope_{"residual_connections_fuse_pass"};
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,247 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
|
||||
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
|
||||
#include "paddle/fluid/framework/ir/graph_traits.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
namespace {
|
||||
constexpr int nodes_removed = 3;
|
||||
constexpr int nodes_added = 1;
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type,
|
||||
const std::vector<std::pair<std::string, std::string>>& inputs,
|
||||
const std::pair<std::string, std::string>& output) {
|
||||
auto op = prog->MutableBlock(0)->AppendOp();
|
||||
op->SetType(type);
|
||||
op->SetAttr("use_mkldnn", true);
|
||||
|
||||
for (const auto& input : inputs) {
|
||||
op->SetInput(input.first, {input.second});
|
||||
}
|
||||
|
||||
op->SetOutput(output.first, {output.second});
|
||||
}
|
||||
|
||||
struct IsReachable {
|
||||
using func = std::function<bool(const std::string&, const std::string&)>;
|
||||
|
||||
auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
|
||||
auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
|
||||
const std::string& name) -> Node* {
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
if (name == node.Name()) {
|
||||
return &node;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
return [&](std::string from, const std::string to) -> bool {
|
||||
if (from == to) return true;
|
||||
|
||||
std::map<std::string, bool> visited;
|
||||
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
visited[node.Name()] = false;
|
||||
}
|
||||
|
||||
visited[from] = true;
|
||||
|
||||
std::list<std::string> queue;
|
||||
queue.push_back(from);
|
||||
|
||||
while (!queue.empty()) {
|
||||
auto cur = find_node(graph, queue.front());
|
||||
queue.pop_front();
|
||||
|
||||
if (cur == nullptr) return false;
|
||||
|
||||
for (auto n : cur->outputs) {
|
||||
if (n->Name() == to) return true;
|
||||
|
||||
if (!visited[n->Name()]) {
|
||||
visited[n->Name()] = true;
|
||||
queue.push_back(n->Name());
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
void AssertOpsCount(const std::unique_ptr<ir::Graph>& graph) {
|
||||
int conv_count = 0;
|
||||
int elementwise_add_count = 0;
|
||||
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp() && node->Op()->Type() == "conv2d") {
|
||||
++conv_count;
|
||||
}
|
||||
if (node->IsOp() && node->Op()->Type() == "elementwise_add") {
|
||||
++elementwise_add_count;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(conv_count, 1);
|
||||
EXPECT_EQ(elementwise_add_count, 0);
|
||||
}
|
||||
|
||||
ProgramDesc BuildProgramDesc(const std::vector<std::string>& transient_vars,
|
||||
const std::vector<std::string>& persistent_vars) {
|
||||
ProgramDesc prog;
|
||||
|
||||
auto add_var_to_prog = [&prog](const std::string& var_name) -> VarDesc* {
|
||||
auto var = prog.MutableBlock(0)->Var(var_name);
|
||||
var->SetType(proto::VarType::LOD_TENSOR);
|
||||
|
||||
return var;
|
||||
};
|
||||
|
||||
for (const auto& v : transient_vars) {
|
||||
add_var_to_prog(v);
|
||||
}
|
||||
|
||||
for (const auto& v : persistent_vars) {
|
||||
auto var = add_var_to_prog(v);
|
||||
var->SetPersistable(true);
|
||||
}
|
||||
|
||||
return prog;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionWithElementwiseAddRelu) {
|
||||
auto prog =
|
||||
BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
|
||||
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass,
|
||||
ConvolutionWithElementwiseAddReluNoBias) {
|
||||
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
|
||||
SetOp(&prog, "conv2d", {{"Input", "a"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionElementwiseAdd) {
|
||||
auto prog = BuildProgramDesc({"a", "b", "c", "d"}, {"bias", "weights"});
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "d"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_FALSE(is_reachable(graph)("a", "d"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, SigmoidConvolutionAddElementwiseRelu) {
|
||||
auto prog =
|
||||
BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
|
||||
SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "c"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "c"}, {"Y", "d"}}, {"Out", "e"});
|
||||
SetOp(&prog, "relu", {{"X", "e"}}, {"Out", "f"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "f"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "f"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(conv_elementwise_add_mkldnn_fuse_pass);
|
Loading…
Reference in new issue