commit
9adb158e5b
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
#define PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class Node;
|
||||
|
||||
/*
|
||||
* Remove the sum op of all gradients of the backward op.
|
||||
* And remove the dependecies of the optimizer related to the
|
||||
* same backward op.
|
||||
*
|
||||
* Before this pass:
|
||||
*
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* \ /
|
||||
* \ /
|
||||
* sum_op
|
||||
* |
|
||||
* sgd_op
|
||||
*
|
||||
* After this pass:
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* | |
|
||||
* sgd_op1 sgd_op2
|
||||
*
|
||||
* sgd_op1 and sgd_op2 will update the same weight which holds the same
|
||||
* memory, so we could benefits from the acceleration
|
||||
*/
|
||||
class LockFreeOptimizePass : public Pass {
|
||||
public:
|
||||
virtual ~LockFreeOptimizePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
private:
|
||||
// Create a new sgd node via current optimizer node
|
||||
ir::Node* CreateNewSGDNode(ir::Graph* graph, ir::Node* forward_node,
|
||||
ir::Node* backward_node, ir::Node* grad_sum_node,
|
||||
ir::Node* optimize_node) const;
|
||||
|
||||
// Replace the input weight's optimizers
|
||||
void ReplaceUpstreamNode(ir::Node* upstream_node,
|
||||
ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Replace the output weight's optimizers
|
||||
void ReplaceAllDownstreamNode(ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Find all weight variables in graph
|
||||
bool FindAllWeightVars(ir::Graph* graph) const;
|
||||
|
||||
// Find the forward_op node via the backward_op node
|
||||
ir::Node* FindForwardOpViaBackwardOp(ir::Graph* graph,
|
||||
ir::Node* backward_node) const;
|
||||
|
||||
std::vector<ir::Node*> FindConnectedNode(ir::Node* upstream_node,
|
||||
ir::Node* downstream_node) const;
|
||||
|
||||
inline bool IsOpNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kOperation && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNameEndsWith(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
boost::algorithm::ends_with(node->Name(), name);
|
||||
}
|
||||
|
||||
inline bool IsVarNameContains(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
node->Name().find(name) != std::string::npos;
|
||||
}
|
||||
|
||||
inline bool IsControlDepFrom(ir::Node* ctrl_dep_node, ir::Node* node) const {
|
||||
PADDLE_ENFORCE(ctrl_dep_node);
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return IsControlDepVar(*ctrl_dep_node) &&
|
||||
ctrl_dep_node->inputs.size() >= 1u &&
|
||||
ctrl_dep_node->inputs[0] == node;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
#endif // PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
@ -0,0 +1,214 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
|
||||
#define MAX_CONCAT_INPUTS 200
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
PDNode* BuildSeqPoolConcatPattern(PDPattern* pattern,
|
||||
const std::string& name_scope,
|
||||
int num_inputs) {
|
||||
auto is_concat_op_with_inputs = [](Node* x, int num) -> bool {
|
||||
return x && x->IsOp() && x->Op()->Type() == "concat" &&
|
||||
x->Op()->Input("X").size() == static_cast<size_t>(num);
|
||||
};
|
||||
|
||||
auto is_nth_input_var_of_concat = [=](Node* x, int idx) -> bool {
|
||||
return x && x->IsVar() && VarLinksToOp(x, "concat") &&
|
||||
x->outputs.size() == 1 && IsNthInput(x, x->outputs[0], "X", idx) &&
|
||||
is_concat_op_with_inputs(x->outputs[0], num_inputs);
|
||||
};
|
||||
|
||||
auto is_seqpool_op_with_pootype_of_nth_input_of_concat = [=](
|
||||
Node* x, const std::string& type, int idx) -> bool {
|
||||
bool this_is_seqpool_op =
|
||||
x && x->IsOp() && x->Op()->Type() == "sequence_pool" &&
|
||||
x->Op()->HasAttr("pooltype") &&
|
||||
boost::get<std::string>(x->Op()->GetAttr("pooltype")) == type &&
|
||||
x->outputs.size() == 2; // seqpool should only have 2 outputs
|
||||
bool satisfied_all = this_is_seqpool_op;
|
||||
if (this_is_seqpool_op) {
|
||||
// Only one output of seqpool_op is nth_input_var of concat,
|
||||
// the other one should be unused empty var.
|
||||
if (is_nth_input_var_of_concat(x->outputs[0], idx)) {
|
||||
satisfied_all = satisfied_all && x->outputs[1]->IsVar() &&
|
||||
x->outputs[1]->outputs.empty();
|
||||
} else {
|
||||
satisfied_all =
|
||||
satisfied_all && is_nth_input_var_of_concat(x->outputs[1], idx) &&
|
||||
x->outputs[0]->IsVar() && x->outputs[0]->outputs.size() == 0;
|
||||
}
|
||||
}
|
||||
return satisfied_all;
|
||||
};
|
||||
|
||||
auto* concat_op = pattern->NewNode(
|
||||
[=](Node* x) { return is_concat_op_with_inputs(x, num_inputs); },
|
||||
name_scope + "/concat_op");
|
||||
concat_op->assert_op_attr<int>("axis", 1);
|
||||
|
||||
auto* concat_out_var = pattern->NewNode(
|
||||
[=](Node* x) {
|
||||
return x && x->IsVar() && VarLinksFromOp(x, "concat") &&
|
||||
x->inputs.size() == 1 &&
|
||||
is_concat_op_with_inputs(x->inputs[0], num_inputs);
|
||||
},
|
||||
name_scope + "/concat_out_var");
|
||||
concat_out_var->assert_is_only_output_of_op("concat");
|
||||
|
||||
std::vector<PDNode*> seqpool_ops_input_var(num_inputs);
|
||||
std::vector<PDNode*> seqpool_ops_output_var(num_inputs);
|
||||
std::vector<PDNode*> seqpool_ops_output_unused_var(num_inputs);
|
||||
std::vector<PDNode*> seqpool_ops(num_inputs);
|
||||
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
seqpool_ops_output_var[i] = pattern->NewNode(
|
||||
[=](Node* x) {
|
||||
return x && x->IsVar() && is_nth_input_var_of_concat(x, i) &&
|
||||
x->inputs.size() == 1 &&
|
||||
is_seqpool_op_with_pootype_of_nth_input_of_concat(x->inputs[0],
|
||||
"SUM", i);
|
||||
},
|
||||
name_scope + "/sequence_pool_out_" + std::to_string(i));
|
||||
|
||||
seqpool_ops_output_unused_var[i] = pattern->NewNode(
|
||||
[=](Node* x) {
|
||||
return x && x->IsVar() && x->inputs.size() == 1 &&
|
||||
x->outputs.size() == 0 &&
|
||||
is_seqpool_op_with_pootype_of_nth_input_of_concat(x->inputs[0],
|
||||
"SUM", i);
|
||||
},
|
||||
name_scope + "/sequence_pool_unused_out_" + std::to_string(i));
|
||||
|
||||
seqpool_ops[i] = pattern->NewNode(
|
||||
[=](Node* x) {
|
||||
return x && x->IsOp() &&
|
||||
is_seqpool_op_with_pootype_of_nth_input_of_concat(x, "SUM", i);
|
||||
},
|
||||
name_scope + "/sequence_pool_op_" + std::to_string(i));
|
||||
|
||||
seqpool_ops_input_var[i] = pattern->NewNode(
|
||||
[=](Node* x) {
|
||||
bool basic = x && x->IsVar() && x->outputs.size() >= 1;
|
||||
bool next_is_fine = false;
|
||||
for (auto* o : x->outputs) {
|
||||
if (is_seqpool_op_with_pootype_of_nth_input_of_concat(o, "SUM",
|
||||
i)) {
|
||||
next_is_fine = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return basic && next_is_fine;
|
||||
},
|
||||
name_scope + "/sequence_pool_in_" + std::to_string(i));
|
||||
|
||||
// Links
|
||||
seqpool_ops[i]
|
||||
->LinksFrom({seqpool_ops_input_var[i]})
|
||||
.LinksTo({seqpool_ops_output_var[i], seqpool_ops_output_unused_var[i]});
|
||||
}
|
||||
concat_op->LinksFrom(seqpool_ops_output_var).LinksTo({concat_out_var});
|
||||
return concat_out_var;
|
||||
}
|
||||
|
||||
int BuildFusion(Graph* graph, const std::string& name_scope, int num_inputs) {
|
||||
GraphPatternDetector gpd;
|
||||
auto* pattern = gpd.mutable_pattern();
|
||||
BuildSeqPoolConcatPattern(pattern, name_scope, num_inputs);
|
||||
|
||||
auto retrieve_node = [](const std::string& name,
|
||||
const GraphPatternDetector::subgraph_t& subgraph,
|
||||
const PDPattern& pat) -> Node* {
|
||||
PADDLE_ENFORCE(subgraph.count(pat.RetrieveNode(name)),
|
||||
"pattern has no Node called %s", name.c_str());
|
||||
Node* p = subgraph.at(pat.RetrieveNode(name));
|
||||
PADDLE_ENFORCE_NOT_NULL(p, "subgraph has no node %s", name.c_str());
|
||||
return p;
|
||||
};
|
||||
|
||||
int fusion_count{0};
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
VLOG(4) << "handle SeqPool Concat fuse";
|
||||
std::vector<std::string> input_names(num_inputs);
|
||||
std::vector<Node*> input_vars(num_inputs);
|
||||
auto& fused_pattern = gpd.pattern();
|
||||
for (int i = 0; i < num_inputs; ++i) {
|
||||
input_vars[i] =
|
||||
retrieve_node(name_scope + "/sequence_pool_in_" + std::to_string(i),
|
||||
subgraph, fused_pattern);
|
||||
input_names[i] = input_vars[i]->Name();
|
||||
}
|
||||
auto* concat_op =
|
||||
retrieve_node(name_scope + "/concat_op", subgraph, fused_pattern);
|
||||
auto* concat_out_var =
|
||||
retrieve_node(name_scope + "/concat_out_var", subgraph, fused_pattern);
|
||||
auto* seqpool_op0 = retrieve_node(name_scope + "/sequence_pool_op_0",
|
||||
subgraph, fused_pattern);
|
||||
|
||||
// Create New OpDesc
|
||||
OpDesc op_desc;
|
||||
op_desc.SetType("fusion_seqpool_concat");
|
||||
op_desc.SetInput("X", input_names);
|
||||
op_desc.SetAttr("pooltype", seqpool_op0->Op()->GetAttr("pooltype"));
|
||||
op_desc.SetAttr("axis", concat_op->Op()->GetAttr("axis"));
|
||||
op_desc.SetOutput("Out", {concat_out_var->Name()});
|
||||
auto* op = graph->CreateOpNode(&op_desc);
|
||||
for (size_t i = 0; i < input_vars.size(); ++i) {
|
||||
IR_NODE_LINK_TO(input_vars[i], op);
|
||||
}
|
||||
IR_NODE_LINK_TO(op, concat_out_var);
|
||||
|
||||
std::unordered_set<const Node*> marked_nodes;
|
||||
for (auto& item : subgraph) {
|
||||
marked_nodes.insert(item.second);
|
||||
}
|
||||
for (size_t i = 0; i < input_vars.size(); ++i) {
|
||||
marked_nodes.erase(input_vars[i]);
|
||||
}
|
||||
marked_nodes.erase(concat_out_var);
|
||||
GraphSafeRemoveNodes(graph, marked_nodes);
|
||||
++fusion_count;
|
||||
};
|
||||
|
||||
gpd(graph, handler);
|
||||
return fusion_count;
|
||||
}
|
||||
|
||||
std::unique_ptr<ir::Graph> SeqPoolConcatFusePass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
FusePassBase::Init(name_scope_, graph.get());
|
||||
int fusion_count = 0;
|
||||
for (int i = MAX_CONCAT_INPUTS; i > 0; --i) {
|
||||
fusion_count +=
|
||||
BuildFusion(graph.get(), name_scope_ + "/" + std::to_string(i), i);
|
||||
}
|
||||
AddStatis(fusion_count);
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(seqpool_concat_fuse_pass,
|
||||
paddle::framework::ir::SeqPoolConcatFusePass);
|
@ -0,0 +1,52 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
/**
|
||||
* Fuse SequencePool(with sum pooltype yet) and Concat;
|
||||
*
|
||||
* Before fuse:
|
||||
* | | |
|
||||
* seq_pool, seq_pool, ... seq_pool
|
||||
* \ | ... /
|
||||
* concat
|
||||
* |
|
||||
* After fuse:
|
||||
* \ | /
|
||||
* FusionSeqPoolConcat
|
||||
* |
|
||||
*/
|
||||
class SeqPoolConcatFusePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~SeqPoolConcatFusePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
const std::string name_scope_{"seqpool_concat_fuse"};
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,198 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include "paddle/fluid/framework/op_proto_maker.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type,
|
||||
const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& outputs) {
|
||||
auto* op = prog->MutableBlock(0)->AppendOp();
|
||||
op->SetType(type);
|
||||
if (type == "sequence_pool") {
|
||||
op->SetInput("X", {inputs[0]});
|
||||
std::string pooltype = "SUM";
|
||||
op->SetAttr("pooltype", pooltype);
|
||||
op->SetOutput("MaxIndex", {outputs[0]});
|
||||
op->SetOutput("Out", {outputs[1]});
|
||||
} else if (type == "concat") {
|
||||
op->SetInput("X", inputs);
|
||||
op->SetAttr("axis", 1);
|
||||
op->SetOutput("Out", {outputs[0]});
|
||||
} else {
|
||||
op->SetInput("X", inputs);
|
||||
op->SetOutput("Out", outputs);
|
||||
}
|
||||
op->SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(),
|
||||
static_cast<int>(OpRole::kForward));
|
||||
}
|
||||
|
||||
int CountOpType(const ir::Graph* graph,
|
||||
const std::string& op_type = "fusion_seqpool_concat") {
|
||||
int count = 0;
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp() && node->Op()->Type() == op_type) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
std::unique_ptr<ir::Graph> GetNumNodesOfBeforeAfter(
|
||||
std::unique_ptr<ir::Graph> graph, int* before, int* after,
|
||||
const std::string& pass_type = "seqpool_concat_fuse_pass") {
|
||||
auto pass = PassRegistry::Instance().Get(pass_type);
|
||||
*before = graph->Nodes().size();
|
||||
graph = pass->Apply(std::move(graph));
|
||||
*after = graph->Nodes().size();
|
||||
return graph;
|
||||
}
|
||||
|
||||
/*
|
||||
* Before fuse:
|
||||
* a b c
|
||||
* | | |
|
||||
* op1 op2 op3
|
||||
* / \ / \ / \
|
||||
* d e f g h i
|
||||
* \ | /
|
||||
* concat
|
||||
* |
|
||||
* j
|
||||
* Type of op1, op2 and op3 are sequence_pool, with "SUM" pooltype attr
|
||||
*
|
||||
* After fuse:
|
||||
* a b c
|
||||
* \ | /
|
||||
* fusion_seqpool_concat
|
||||
* |
|
||||
* j
|
||||
*/
|
||||
TEST(SeqPoolConcatFusePass, basic) {
|
||||
ProgramDesc prog;
|
||||
for (auto& v : std::vector<std::string>(
|
||||
{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"})) {
|
||||
auto* var = prog.MutableBlock(0)->Var(v);
|
||||
var->SetType(proto::VarType::LOD_TENSOR);
|
||||
}
|
||||
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({"a"}),
|
||||
std::vector<std::string>({"d", "e"}));
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({"b"}),
|
||||
std::vector<std::string>({"f", "g"}));
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({"c"}),
|
||||
std::vector<std::string>({"h", "i"}));
|
||||
SetOp(&prog, "concat", std::vector<std::string>({"e", "g", "i"}),
|
||||
std::vector<std::string>({"j"}));
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
int before, after;
|
||||
graph = GetNumNodesOfBeforeAfter(std::move(graph), &before, &after);
|
||||
// Remove 10 Nodes: op1, op2, op3, d, e, f, g, h, i, concat_op
|
||||
// Add 1 Node: fusion_seqpool_concat
|
||||
EXPECT_EQ(after, before - 9);
|
||||
EXPECT_EQ(CountOpType(graph.get()), 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Before fuse:
|
||||
* a b
|
||||
* | / \
|
||||
* op1 op2 op3
|
||||
* / \ / \ \
|
||||
* c d e f g
|
||||
* \ /
|
||||
* concat
|
||||
* |
|
||||
* h
|
||||
* Type of op1 and op2 are sequence_pool, with "SUM" pooltype attr
|
||||
*
|
||||
* After fuse:
|
||||
* a b
|
||||
* \ / \
|
||||
* fusion_seqpool_concat op3
|
||||
* | |
|
||||
* h g
|
||||
*/
|
||||
TEST(SeqPoolConcatFusePass, advanced) {
|
||||
ProgramDesc prog;
|
||||
for (auto& v :
|
||||
std::vector<std::string>({"a", "b", "c", "d", "e", "f", "g", "h"})) {
|
||||
auto* var = prog.MutableBlock(0)->Var(v);
|
||||
var->SetType(proto::VarType::LOD_TENSOR);
|
||||
}
|
||||
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({"a"}),
|
||||
std::vector<std::string>({"c", "d"}));
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({"b"}),
|
||||
std::vector<std::string>({"e", "f"}));
|
||||
SetOp(&prog, "op3", std::vector<std::string>({"b"}),
|
||||
std::vector<std::string>({"g"}));
|
||||
SetOp(&prog, "concat", std::vector<std::string>({"d", "f"}),
|
||||
std::vector<std::string>({"h"}));
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
int before, after;
|
||||
graph = GetNumNodesOfBeforeAfter(std::move(graph), &before, &after);
|
||||
// Remove 7 Nodes: op1, op2, c, d, e, f concat_op
|
||||
// Add 1 Node: fusion_seqpool_concat
|
||||
EXPECT_EQ(after, before - 6);
|
||||
EXPECT_EQ(CountOpType(graph.get()), 1);
|
||||
}
|
||||
|
||||
ProgramDesc BuildProgramDesc(int num_inputs_of_concat) {
|
||||
ProgramDesc prog;
|
||||
auto new_var = [&](const std::string& name) {
|
||||
auto* var = prog.MutableBlock(0)->Var(name);
|
||||
var->SetType(proto::VarType::LOD_TENSOR);
|
||||
};
|
||||
std::vector<std::string> concat_inputs;
|
||||
for (int i = 0; i < num_inputs_of_concat; ++i) {
|
||||
std::string prefix = "seqpool_op_" + i;
|
||||
new_var(prefix + "in");
|
||||
new_var(prefix + "out");
|
||||
new_var(prefix + "out_unused");
|
||||
SetOp(&prog, "sequence_pool", std::vector<std::string>({prefix + "in"}),
|
||||
std::vector<std::string>({prefix + "out", prefix + "out_unused"}));
|
||||
concat_inputs.push_back(prefix + "out");
|
||||
}
|
||||
SetOp(&prog, "concat", concat_inputs,
|
||||
std::vector<std::string>({"concat_out"}));
|
||||
return prog;
|
||||
}
|
||||
|
||||
// test more inputs of concat
|
||||
TEST(SeqPoolConcatFusePass, more_inputs) {
|
||||
for (int num : {1, 2, 10}) {
|
||||
ProgramDesc prog = BuildProgramDesc(num);
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
int before, after;
|
||||
graph = GetNumNodesOfBeforeAfter(std::move(graph), &before, &after);
|
||||
// Remove Nodes: n * (seqpool_op, out, out_unused), and concat_op
|
||||
// Add Node: fusion_seqpool_concat op
|
||||
EXPECT_EQ(after, before - num * 3);
|
||||
EXPECT_EQ(CountOpType(graph.get()), 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(seqpool_concat_fuse_pass);
|
@ -0,0 +1,148 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
|
||||
#include "paddle/fluid/framework/ir/node.h"
|
||||
#include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
template <int times>
|
||||
std::unique_ptr<ir::Graph> TransposeFlattenConcatFusePass<times>::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
const std::string pattern_name =
|
||||
"transpose_flatten" + std::to_string(times) + "_concat_fuse";
|
||||
FusePassBase::Init(pattern_name, graph.get());
|
||||
|
||||
GraphPatternDetector gpd;
|
||||
std::vector<PDNode *> input_nodes;
|
||||
for (int i = 0; i < times; i++) {
|
||||
input_nodes.push_back(gpd.mutable_pattern()
|
||||
->NewNode("x" + std::to_string(i))
|
||||
->assert_is_op_input("transpose2", "X")
|
||||
->AsInput());
|
||||
}
|
||||
|
||||
patterns::TransposeFlattenConcat pattern(gpd.mutable_pattern(), pattern_name);
|
||||
pattern(input_nodes, times);
|
||||
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
|
||||
Graph *g) {
|
||||
const int kNumFields = 5;
|
||||
const int kTransOffset = 1;
|
||||
const int kTransOutOffset = 2;
|
||||
const int kFlattenOffset = 3;
|
||||
const int kFlattenOutOffset = 4;
|
||||
std::vector<Node *> nodes;
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("transpose_out" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("flatten" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(
|
||||
subgraph.at(pattern.GetPDNode("flatten_out" + std::to_string(i))));
|
||||
PADDLE_ENFORCE(subgraph.at(input_nodes[i]));
|
||||
|
||||
nodes.push_back(subgraph.at(input_nodes[i]));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("transpose" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("transpose_out" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("flatten" + std::to_string(i))));
|
||||
nodes.push_back(
|
||||
subgraph.at(pattern.GetPDNode("flatten_out" + std::to_string(i))));
|
||||
}
|
||||
|
||||
Node *concat_op = subgraph.at(pattern.GetPDNode("concat"));
|
||||
Node *concat_out = subgraph.at(pattern.GetPDNode("concat_out"));
|
||||
std::vector<std::string> input_names;
|
||||
std::vector<int> trans_axis = boost::get<std::vector<int>>(
|
||||
nodes[kTransOffset]->Op()->GetAttr("axis"));
|
||||
int flatten_axis =
|
||||
boost::get<int>(nodes[kFlattenOffset]->Op()->GetAttr("axis"));
|
||||
int concat_axis = boost::get<int>(concat_op->Op()->GetAttr("axis"));
|
||||
std::string output_name = concat_out->Name();
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
input_names.push_back(nodes[i * kNumFields]->Name());
|
||||
}
|
||||
|
||||
framework::OpDesc new_op_desc;
|
||||
new_op_desc.SetType("fusion_transpose_flatten_concat");
|
||||
new_op_desc.SetInput("X", input_names);
|
||||
new_op_desc.SetAttr("trans_axis", trans_axis);
|
||||
new_op_desc.SetAttr("flatten_axis", flatten_axis);
|
||||
new_op_desc.SetAttr("concat_axis", concat_axis);
|
||||
new_op_desc.SetOutput("Out", {output_name});
|
||||
new_op_desc.Flush();
|
||||
|
||||
// Create a new node for the fused op.
|
||||
auto *new_conv_op = graph->CreateOpNode(&new_op_desc);
|
||||
|
||||
std::unordered_set<const Node *> delete_nodes;
|
||||
|
||||
for (int i = 0; i < times; i++) {
|
||||
nodes[i * kNumFields]->outputs.push_back(new_conv_op);
|
||||
new_conv_op->inputs.push_back(nodes[i * kNumFields]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kTransOffset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kTransOutOffset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kFlattenOffset]);
|
||||
delete_nodes.insert(nodes[i * kNumFields + kFlattenOutOffset]);
|
||||
}
|
||||
delete_nodes.insert(concat_op);
|
||||
|
||||
new_conv_op->outputs.push_back(concat_out);
|
||||
concat_out->inputs.push_back(new_conv_op);
|
||||
|
||||
// Delete the unneeded nodes.
|
||||
GraphSafeRemoveNodes(graph.get(), delete_nodes);
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
return graph;
|
||||
}
|
||||
|
||||
template class TransposeFlattenConcatFusePass<1>;
|
||||
template class TransposeFlattenConcatFusePass<3>;
|
||||
template class TransposeFlattenConcatFusePass<4>;
|
||||
template class TransposeFlattenConcatFusePass<5>;
|
||||
template class TransposeFlattenConcatFusePass<6>;
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(transpose_flatten_concat_fuse_pass,
|
||||
paddle::framework::ir::TransposeFlattenConcatFusePass<1>);
|
||||
|
||||
REGISTER_PASS(transpose_flatten3_concat_fuse_pass,
|
||||
paddle::framework::ir::TransposeFlattenConcatFusePass<3>);
|
||||
|
||||
REGISTER_PASS(transpose_flatten4_concat_fuse_pass,
|
||||
paddle::framework::ir::TransposeFlattenConcatFusePass<4>);
|
||||
|
||||
REGISTER_PASS(transpose_flatten5_concat_fuse_pass,
|
||||
paddle::framework::ir::TransposeFlattenConcatFusePass<5>);
|
||||
|
||||
REGISTER_PASS(transpose_flatten6_concat_fuse_pass,
|
||||
paddle::framework::ir::TransposeFlattenConcatFusePass<6>);
|
@ -0,0 +1,38 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
// There may be many transpose-flatten structures in a model, and the output of
|
||||
// these structures will be used as inputs to the concat Op. This pattern will
|
||||
// be detected by our pass. The times here represents the repeat times of this
|
||||
// structure.
|
||||
template <int times>
|
||||
class TransposeFlattenConcatFusePass : public FusePassBase {
|
||||
public:
|
||||
virtual ~TransposeFlattenConcatFusePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue