Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into map_api
commit
f404046878
@ -0,0 +1,154 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph_traits.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
namespace {
|
||||
|
||||
// The function keeps the graph consistent by replacing
|
||||
// a node 'from' in the set of inputs nodes
|
||||
// of the visited node by a node 'to'.
|
||||
void CorrectGraphEdges(Graph* graph, Node* from, Node* to) {
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
auto from_in_inputs =
|
||||
std::find(std::begin(node.inputs), std::end(node.inputs), from);
|
||||
|
||||
if (from_in_inputs != std::end(node.inputs)) {
|
||||
IR_NODE_LINK_TO(to, (&node));
|
||||
|
||||
auto inputs = node.Op()->Inputs();
|
||||
|
||||
using input_type = VariableNameMap::value_type;
|
||||
|
||||
std::for_each(std::begin(inputs), std::end(inputs),
|
||||
[from, to, &node](const input_type& i) -> void {
|
||||
auto param_names = i.second;
|
||||
auto pi = std::find(std::begin(param_names),
|
||||
std::end(param_names), from->Name());
|
||||
|
||||
if (pi != std::end(param_names)) {
|
||||
node.Op()->SetInput(i.first, {to->Name()});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
using graph_ptr = std::unique_ptr<ir::Graph>;
|
||||
|
||||
graph_ptr ConvElementwiseAddMKLDNNFusePass::ApplyImpl(graph_ptr graph) const {
|
||||
FusePassBase::Init(name_scope_, graph.get());
|
||||
|
||||
GraphPatternDetector gpd;
|
||||
auto pattern = gpd.mutable_pattern();
|
||||
|
||||
patterns::Conv conv_pattern{pattern, name_scope_};
|
||||
auto conv_output = conv_pattern();
|
||||
|
||||
patterns::ElementwiseAdd elementwise_add_pattern{pattern, name_scope_};
|
||||
elementwise_add_pattern(conv_output);
|
||||
|
||||
conv_output->AsIntermediate();
|
||||
|
||||
auto conv_op_has_bias = [](const Node& conv_op) -> std::pair<bool, Node*> {
|
||||
auto bias_input_names = conv_op.Op()->Inputs();
|
||||
auto bias_it = bias_input_names.find("Bias");
|
||||
|
||||
if (bias_it != std::end(bias_input_names)) {
|
||||
bool has_bias = !bias_it->second.empty();
|
||||
|
||||
if (has_bias) {
|
||||
auto conv_bias_names = bias_it->second;
|
||||
auto conv_bias_names_it =
|
||||
std::find_if(std::begin(conv_op.inputs), std::end(conv_op.inputs),
|
||||
[&conv_bias_names](Node* n) -> bool {
|
||||
return n->Name() == conv_bias_names[0];
|
||||
});
|
||||
return std::make_pair(has_bias, *conv_bias_names_it);
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(false, nullptr);
|
||||
};
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_input, conv_input, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_filter, conv_filter, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(conv_output, conv_output, conv_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
|
||||
elementwise_add_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_x, elementwise_add_x,
|
||||
elementwise_add_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_out, elementwise_add_out,
|
||||
elementwise_add_pattern);
|
||||
|
||||
if (FindFuseOption(*conv_op, *elementwise_add_op) != FUSE_MKLDNN) return;
|
||||
|
||||
OpDesc op_desc;
|
||||
op_desc.SetType("conv2d");
|
||||
|
||||
op_desc.SetInput("Input", {conv_input->Name()});
|
||||
op_desc.SetInput("Filter", {conv_filter->Name()});
|
||||
op_desc.SetInput("ResidualData", {elementwise_add_x->Name()});
|
||||
op_desc.SetOutput("Output", {conv_output->Name()});
|
||||
|
||||
bool has_bias;
|
||||
Node* conv_bias;
|
||||
|
||||
std::tie(has_bias, conv_bias) = conv_op_has_bias(*conv_op);
|
||||
|
||||
if (has_bias) {
|
||||
op_desc.SetInput("Bias", {conv_bias->Name()});
|
||||
}
|
||||
|
||||
for (const auto& attr : conv_op->Op()->GetAttrMap()) {
|
||||
op_desc.SetAttr(attr.first, attr.second);
|
||||
}
|
||||
|
||||
op_desc.SetAttr("fuse_residual_connection", true);
|
||||
|
||||
auto fused_conv_op = g->CreateOpNode(&op_desc);
|
||||
|
||||
IR_NODE_LINK_TO(conv_input, fused_conv_op);
|
||||
IR_NODE_LINK_TO(conv_filter, fused_conv_op);
|
||||
IR_NODE_LINK_TO(elementwise_add_x, fused_conv_op);
|
||||
IR_NODE_LINK_TO(fused_conv_op, conv_output);
|
||||
|
||||
if (has_bias) {
|
||||
IR_NODE_LINK_TO(conv_bias, fused_conv_op);
|
||||
}
|
||||
|
||||
CorrectGraphEdges(g, elementwise_add_out, conv_output);
|
||||
GraphSafeRemoveNodes(g, {elementwise_add_out, conv_op, elementwise_add_op});
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
|
||||
return graph;
|
||||
}
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(conv_elementwise_add_mkldnn_fuse_pass,
|
||||
paddle::framework::ir::ConvElementwiseAddMKLDNNFusePass);
|
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class ConvElementwiseAddMKLDNNFusePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~ConvElementwiseAddMKLDNNFusePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
const std::string name_scope_{"residual_connections_fuse_pass"};
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,247 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
|
||||
#include "paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.h"
|
||||
#include "paddle/fluid/framework/ir/graph_traits.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
namespace {
|
||||
constexpr int nodes_removed = 3;
|
||||
constexpr int nodes_added = 1;
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type,
|
||||
const std::vector<std::pair<std::string, std::string>>& inputs,
|
||||
const std::pair<std::string, std::string>& output) {
|
||||
auto op = prog->MutableBlock(0)->AppendOp();
|
||||
op->SetType(type);
|
||||
op->SetAttr("use_mkldnn", true);
|
||||
|
||||
for (const auto& input : inputs) {
|
||||
op->SetInput(input.first, {input.second});
|
||||
}
|
||||
|
||||
op->SetOutput(output.first, {output.second});
|
||||
}
|
||||
|
||||
struct IsReachable {
|
||||
using func = std::function<bool(const std::string&, const std::string&)>;
|
||||
|
||||
auto operator()(const std::unique_ptr<ir::Graph>& graph) -> func {
|
||||
auto find_node = [](const std::unique_ptr<ir::Graph>& graph,
|
||||
const std::string& name) -> Node* {
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
if (name == node.Name()) {
|
||||
return &node;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
return [&](std::string from, const std::string to) -> bool {
|
||||
if (from == to) return true;
|
||||
|
||||
std::map<std::string, bool> visited;
|
||||
|
||||
for (auto& node : GraphTraits::DFS(*graph)) {
|
||||
visited[node.Name()] = false;
|
||||
}
|
||||
|
||||
visited[from] = true;
|
||||
|
||||
std::list<std::string> queue;
|
||||
queue.push_back(from);
|
||||
|
||||
while (!queue.empty()) {
|
||||
auto cur = find_node(graph, queue.front());
|
||||
queue.pop_front();
|
||||
|
||||
if (cur == nullptr) return false;
|
||||
|
||||
for (auto n : cur->outputs) {
|
||||
if (n->Name() == to) return true;
|
||||
|
||||
if (!visited[n->Name()]) {
|
||||
visited[n->Name()] = true;
|
||||
queue.push_back(n->Name());
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
void AssertOpsCount(const std::unique_ptr<ir::Graph>& graph) {
|
||||
int conv_count = 0;
|
||||
int elementwise_add_count = 0;
|
||||
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp() && node->Op()->Type() == "conv2d") {
|
||||
++conv_count;
|
||||
}
|
||||
if (node->IsOp() && node->Op()->Type() == "elementwise_add") {
|
||||
++elementwise_add_count;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(conv_count, 1);
|
||||
EXPECT_EQ(elementwise_add_count, 0);
|
||||
}
|
||||
|
||||
ProgramDesc BuildProgramDesc(const std::vector<std::string>& transient_vars,
|
||||
const std::vector<std::string>& persistent_vars) {
|
||||
ProgramDesc prog;
|
||||
|
||||
auto add_var_to_prog = [&prog](const std::string& var_name) -> VarDesc* {
|
||||
auto var = prog.MutableBlock(0)->Var(var_name);
|
||||
var->SetType(proto::VarType::LOD_TENSOR);
|
||||
|
||||
return var;
|
||||
};
|
||||
|
||||
for (const auto& v : transient_vars) {
|
||||
add_var_to_prog(v);
|
||||
}
|
||||
|
||||
for (const auto& v : persistent_vars) {
|
||||
auto var = add_var_to_prog(v);
|
||||
var->SetPersistable(true);
|
||||
}
|
||||
|
||||
return prog;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionWithElementwiseAddRelu) {
|
||||
auto prog =
|
||||
BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
|
||||
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass,
|
||||
ConvolutionWithElementwiseAddReluNoBias) {
|
||||
auto prog = BuildProgramDesc({"a", "b", "c", "d", "e"}, {"weights"});
|
||||
SetOp(&prog, "conv2d", {{"Input", "a"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
SetOp(&prog, "relu", {{"X", "d"}}, {"Out", "e"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "relu"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, ConvolutionElementwiseAdd) {
|
||||
auto prog = BuildProgramDesc({"a", "b", "c", "d"}, {"bias", "weights"});
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "a"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "b"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "b"}, {"Y", "c"}}, {"Out", "d"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "d"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_FALSE(is_reachable(graph)("a", "d"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
TEST(ConvElementwiseAddMKLDNNFusePass, SigmoidConvolutionAddElementwiseRelu) {
|
||||
auto prog =
|
||||
BuildProgramDesc({"a", "b", "c", "d", "e", "f"}, {"bias", "weights"});
|
||||
SetOp(&prog, "sigmoid", {{"X", "a"}}, {"Out", "b"});
|
||||
SetOp(&prog, "conv2d",
|
||||
{{"Input", "b"}, {"Bias", "bias"}, {"Filter", "weights"}},
|
||||
{"Output", "c"});
|
||||
SetOp(&prog, "elementwise_add", {{"X", "c"}, {"Y", "d"}}, {"Out", "e"});
|
||||
SetOp(&prog, "relu", {{"X", "e"}}, {"Out", "f"});
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
IsReachable is_reachable;
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "f"));
|
||||
|
||||
auto pass =
|
||||
PassRegistry::Instance().Get("conv_elementwise_add_mkldnn_fuse_pass");
|
||||
int original_nodes_num = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
int current_nodes_num = graph->Nodes().size();
|
||||
|
||||
EXPECT_TRUE(is_reachable(graph)("a", "f"));
|
||||
|
||||
EXPECT_EQ(original_nodes_num - nodes_removed + nodes_added,
|
||||
current_nodes_num);
|
||||
AssertOpsCount(graph);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(conv_elementwise_add_mkldnn_fuse_pass);
|
@ -0,0 +1,101 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h"
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
|
||||
GraphPatternDetector gpd;
|
||||
auto* pattern = gpd.mutable_pattern();
|
||||
|
||||
PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "X"))
|
||||
->assert_is_op_input("sequence_conv")
|
||||
->assert_var_not_persistable();
|
||||
patterns::SeqConvEltAddRelu fuse_pattern(pattern, name_scope);
|
||||
fuse_pattern(x);
|
||||
|
||||
// Create New OpDesc
|
||||
auto fuse_creator = [&](Node* seqconv, Node* input, Node* seqconv_weight,
|
||||
Node* eltadd_bias, Node* relu_out) {
|
||||
OpDesc op_desc;
|
||||
op_desc.SetType("fusion_seqconv_eltadd_relu");
|
||||
op_desc.SetInput("X", {input->Name()});
|
||||
op_desc.SetInput("Filter", {seqconv_weight->Name()});
|
||||
op_desc.SetInput("Bias", {eltadd_bias->Name()});
|
||||
op_desc.SetAttr("contextLength", seqconv->Op()->GetAttr("contextLength"));
|
||||
op_desc.SetAttr("contextStart", seqconv->Op()->GetAttr("contextStart"));
|
||||
op_desc.SetAttr("contextStride", seqconv->Op()->GetAttr("contextStride"));
|
||||
PADDLE_ENFORCE(graph->Has(kParamScopeAttr));
|
||||
auto* scope = graph->Get<Scope*>(kParamScopeAttr);
|
||||
const std::string ColMat = patterns::UniqueKey("SeqConvColMat");
|
||||
op_desc.SetOutput("ColMat", {ColMat});
|
||||
op_desc.SetOutput("Out", {relu_out->Name()});
|
||||
scope->Var(ColMat)->GetMutable<LoDTensor>();
|
||||
|
||||
auto* op = graph->CreateOpNode(&op_desc);
|
||||
IR_NODE_LINK_TO(input, op);
|
||||
IR_NODE_LINK_TO(seqconv_weight, op);
|
||||
IR_NODE_LINK_TO(eltadd_bias, op);
|
||||
IR_NODE_LINK_TO(op, relu_out);
|
||||
return op;
|
||||
};
|
||||
|
||||
int fusion_count{0};
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
VLOG(4) << "handle SeqConv EltAdd Relu fuse";
|
||||
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(eltadd, eltadd, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(eltadd_bias, eltadd_bias, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(eltadd_out, eltadd_out, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(relu, relu, fuse_pattern);
|
||||
GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, fuse_pattern);
|
||||
|
||||
fuse_creator(seqconv, subgraph.at(x), seqconv_weight, eltadd_bias,
|
||||
relu_out);
|
||||
std::unordered_set<const Node*> marked_nodes(
|
||||
{seqconv, seqconv_out, eltadd, eltadd_out, relu});
|
||||
GraphSafeRemoveNodes(graph, marked_nodes);
|
||||
++fusion_count;
|
||||
};
|
||||
|
||||
gpd(graph, handler);
|
||||
|
||||
return fusion_count;
|
||||
}
|
||||
|
||||
std::unique_ptr<ir::Graph> SeqConvEltAddReluFusePass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
FusePassBase::Init(name_scope_, graph.get());
|
||||
|
||||
int fusion_count = BuildFusion(graph.get(), name_scope_, param_scope());
|
||||
AddStatis(fusion_count);
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(seqconv_eltadd_relu_fuse_pass,
|
||||
paddle::framework::ir::SeqConvEltAddReluFusePass);
|
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class SeqConvEltAddReluFusePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~SeqConvEltAddReluFusePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
const std::string name_scope_{"seqconv_eltadd_relu_fuse"};
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,229 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/fusion_seqconv_eltadd_relu_op.h"
|
||||
#include <algorithm> // for min, max
|
||||
#include <string>
|
||||
#include "paddle/fluid/operators/math/blas.h"
|
||||
#include "paddle/fluid/operators/math/fc_compute.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
void FusionSeqConvEltAddReluOp::InferShape(
|
||||
framework::InferShapeContext* ctx) const {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||
"Input(X) of FusionSeqConvEltAddReluOp should not be null.");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->HasInput("Filter"),
|
||||
"Input(Filter) of FusionSeqConvEltAddReluOp should not be null.");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->HasInput("Bias"),
|
||||
"Input(Bias) of FusionSeqConvEltAddReluOp should not be null.");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->HasOutput("Out"),
|
||||
"Output(Out) of FusionSeqConvEltAddReluOp should not be null.");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->HasOutput("ColMat"),
|
||||
"Output(ColMat) of FusionSeqConvEltAddReluOp should not be null.");
|
||||
|
||||
auto x_dims = ctx->GetInputDim("X");
|
||||
auto w_dims = ctx->GetInputDim("Filter");
|
||||
int context_length = ctx->Attrs().Get<int>("contextLength");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->Attrs().Get<int>("contextStride") == 1,
|
||||
"Currently, FusionSeqConvEltAddReluOp only supports contextStride=1.");
|
||||
PADDLE_ENFORCE(x_dims.size() == 2 && w_dims.size() == 2,
|
||||
"Input(X, Filter) should be 2-D tensor.");
|
||||
PADDLE_ENFORCE(x_dims.size() == 2 && w_dims.size() == 2,
|
||||
"Input(X, Filter) should be 2-D tensor.");
|
||||
PADDLE_ENFORCE(w_dims[0] == context_length * x_dims[1],
|
||||
"Filter's height should be context_length * "
|
||||
"input_hidden_size .");
|
||||
PADDLE_ENFORCE_GT(context_length + ctx->Attrs().Get<int>("contextStart"), 0,
|
||||
"contextStart size should be smaller than contextLength.");
|
||||
|
||||
ctx->SetOutputDim("Out", {x_dims[0], w_dims[1]});
|
||||
ctx->SetOutputDim("ColMat", {x_dims[0], w_dims[0]});
|
||||
ctx->ShareLoD("X", "Out");
|
||||
}
|
||||
|
||||
framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const {
|
||||
return framework::OpKernelType(
|
||||
framework::ToDataType(ctx.Input<framework::LoDTensor>("X")->type()),
|
||||
ctx.device_context());
|
||||
}
|
||||
|
||||
void FusionSeqConvEltAddReluOpMaker::Make() {
|
||||
AddInput("X",
|
||||
"(LoDTensor) the input is a LodTensor, which support "
|
||||
"variable-time length input sequence. The underlying tensor in "
|
||||
"this LoDTensor is a matrix with shape (T X M), where T is the "
|
||||
"total time steps in this mini-batch, M is the dim size of x.");
|
||||
// PaddingData only support false yet, should be ensured at pass.
|
||||
AddInput("Filter",
|
||||
"(Tensor) same as the input(Filter) of sequence conv op is an "
|
||||
"learnable parameter."
|
||||
"This is a tensor with shape (K, N), where K is the "
|
||||
"context_length * dim size of x, N is the output feature size.");
|
||||
AddInput("Bias",
|
||||
"(Tensor) the learnable weights. shape (1, N), where N is the "
|
||||
"output feature size");
|
||||
AddOutput(
|
||||
"Out",
|
||||
"(LoDTensor) the output(Out) is a LodTensor, which support "
|
||||
"variable-time length output sequence. The underlying tensor in "
|
||||
"this LoDTensor is a matrix with shape (T, N), where, T is the "
|
||||
"total time steps in this mini-batch, N is the output feature size.");
|
||||
AddOutput("ColMat",
|
||||
"(Tensor) (T, K), where T is where T is the "
|
||||
"total time steps in this mini-batch, K is height of Filter")
|
||||
.AsIntermediate();
|
||||
AddAttr<int>("contextLength",
|
||||
"(int) the contextLength of FusionSeqConvEltAddReluOp is the "
|
||||
"height of the convolution kernel.")
|
||||
.GreaterThan(0);
|
||||
AddAttr<int>("contextStart",
|
||||
"(int, default:0) the contextStart of FusionSeqConvEltAddReluOp "
|
||||
"represents the beginning of the convolution of the number of "
|
||||
"rows of sequence, which can be negative. The negative number "
|
||||
"means to pad contextStart time-steps of zeros or learnable "
|
||||
"parameters at the beginning of each instance. The positive "
|
||||
"number means to skip contextStart time-steps of each "
|
||||
"instance.")
|
||||
.SetDefault(0);
|
||||
AddAttr<int>(
|
||||
"contextStride",
|
||||
"(int, default:1) the contextStride of FusionSeqConvEltAddReluOp "
|
||||
"represents the stride length of convolution kernel. "
|
||||
"Currently, FusionSeqConvEltAddReluOp only supports"
|
||||
"contextStride=1.")
|
||||
.SetDefault(1)
|
||||
.GreaterThan(0);
|
||||
AddComment(R"DOC(
|
||||
Fusion Sequence Conv and ElementwiseAdd Operator.
|
||||
)DOC");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
using DeviceContext = paddle::platform::CPUDeviceContext;
|
||||
auto* x = ctx.Input<LoDTensor>("X");
|
||||
auto* w = ctx.Input<Tensor>("Filter");
|
||||
auto* b = ctx.Input<Tensor>("Bias");
|
||||
auto* y = ctx.Output<LoDTensor>("Out");
|
||||
auto* col = ctx.Output<Tensor>("ColMat");
|
||||
|
||||
auto x_lod = x->lod();
|
||||
auto x_dims = x->dims();
|
||||
auto w_dims = w->dims();
|
||||
PADDLE_ENFORCE_EQ(b->numel(), w_dims[1],
|
||||
"bias size should be equal to output feature size.");
|
||||
PADDLE_ENFORCE_EQ(x_lod.size(), 1UL,
|
||||
"Only support one level sequence now.");
|
||||
|
||||
const T* x_data = x->data<T>();
|
||||
const T* w_data = w->data<T>();
|
||||
const T* b_data = b->data<T>();
|
||||
T* y_data = y->mutable_data<T>(ctx.GetPlace());
|
||||
T* col_data = col->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
int context_start = ctx.Attr<int>("contextStart");
|
||||
int context_length = ctx.Attr<int>("contextLength");
|
||||
int up_pad = std::max(0, -context_start);
|
||||
int down_pad = std::max(0, context_start + context_length - 1);
|
||||
// im2col
|
||||
int src_mat_w = static_cast<int>(x_dims[1]);
|
||||
int src_mat_w_sz = src_mat_w * sizeof(T);
|
||||
int col_mat_w = static_cast<int>(w_dims[0]);
|
||||
int col_mat_w_sz = col_mat_w * sizeof(T);
|
||||
for (int i = 0; i < static_cast<int>(x_lod[0].size()) - 1; ++i) {
|
||||
int st = x_lod[0][i];
|
||||
int ed = x_lod[0][i + 1];
|
||||
const T* src_data = x_data + st * src_mat_w;
|
||||
T* dst_data = col_data + st * col_mat_w;
|
||||
int seq_len = ed - st;
|
||||
if (seq_len > up_pad + down_pad) {
|
||||
// zero all up_pad and fill data
|
||||
std::memset(dst_data, 0, up_pad * col_mat_w_sz);
|
||||
dst_data = dst_data + up_pad * src_mat_w;
|
||||
int copy_size = col_mat_w_sz - up_pad * src_mat_w_sz;
|
||||
for (int j = 0; j < up_pad; ++j) {
|
||||
// blas.VCOPY?
|
||||
std::memcpy(dst_data, src_data, copy_size);
|
||||
dst_data += (col_mat_w - src_mat_w);
|
||||
copy_size += src_mat_w_sz;
|
||||
}
|
||||
// fill data
|
||||
for (int j = 0; j < seq_len - up_pad - down_pad; ++j) {
|
||||
std::memcpy(dst_data, src_data, copy_size);
|
||||
dst_data += col_mat_w;
|
||||
src_data += src_mat_w;
|
||||
}
|
||||
// zero all down_pad and fill data
|
||||
std::memset(dst_data, 0, down_pad * col_mat_w_sz);
|
||||
copy_size -= src_mat_w_sz;
|
||||
for (int j = 0; j < down_pad; ++j) {
|
||||
std::memcpy(dst_data, src_data, copy_size);
|
||||
dst_data += col_mat_w;
|
||||
src_data += src_mat_w;
|
||||
copy_size -= src_mat_w_sz;
|
||||
}
|
||||
} else {
|
||||
PADDLE_ENFORCE_GE(context_length, up_pad + down_pad + 1);
|
||||
std::memset(dst_data, 0, seq_len * col_mat_w_sz);
|
||||
dst_data = dst_data + up_pad * src_mat_w;
|
||||
int zero_sz = up_pad * src_mat_w_sz;
|
||||
int cur_src_sz = seq_len * src_mat_w_sz;
|
||||
for (int j = 0; j < std::min(up_pad, seq_len); ++j) {
|
||||
int copy_size = std::min(cur_src_sz, col_mat_w_sz - zero_sz);
|
||||
std::memcpy(dst_data, src_data, copy_size);
|
||||
dst_data += (col_mat_w - src_mat_w);
|
||||
zero_sz -= src_mat_w_sz;
|
||||
}
|
||||
// from bottom
|
||||
dst_data = col_data + ed * col_mat_w;
|
||||
src_data = x_data + st * src_mat_w;
|
||||
zero_sz = down_pad * src_mat_w_sz;
|
||||
for (int j = 1; j <= std::min(down_pad, seq_len); ++j) {
|
||||
int copy_size = std::min(cur_src_sz, col_mat_w_sz - zero_sz);
|
||||
std::memcpy(dst_data - (zero_sz + copy_size) / sizeof(T),
|
||||
src_data + std::max(seq_len - j - up_pad, 0) * src_mat_w,
|
||||
copy_size);
|
||||
dst_data -= col_mat_w;
|
||||
zero_sz -= src_mat_w_sz;
|
||||
}
|
||||
}
|
||||
}
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
|
||||
math::FCCompute<DeviceContext, T>(blas, x_dims[0], w_dims[1], w_dims[0],
|
||||
col_data, w_data, y_data, b_data, true);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(fusion_seqconv_eltadd_relu, ops::FusionSeqConvEltAddReluOp,
|
||||
ops::FusionSeqConvEltAddReluOpMaker,
|
||||
paddle::framework::DefaultGradOpDescMaker<true>);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(fusion_seqconv_eltadd_relu,
|
||||
ops::FusionSeqConvEltAddReluKernel<float>,
|
||||
ops::FusionSeqConvEltAddReluKernel<double>);
|
@ -0,0 +1,42 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using LoDTensor = framework::LoDTensor;
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
class FusionSeqConvEltAddReluOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override;
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const override;
|
||||
};
|
||||
|
||||
class FusionSeqConvEltAddReluOpMaker
|
||||
: public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override;
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue