add more skip strategy

revert-15296-async_double_buffered_py_reader
dzhwinter 6 years ago
parent 2739096eec
commit d6d3e6afe2

@ -15,6 +15,7 @@
#include "paddle/fluid/framework/details/graph_print_pass.h" #include "paddle/fluid/framework/details/graph_print_pass.h"
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -54,6 +55,11 @@ class GraphvizOp : public GraphvizNode {
} }
} }
template <typename Callback>
void AddCustomEdge(const Callback& cb) {
stream_ << cb() << std::endl;
}
private: private:
std::ostringstream stream_; std::ostringstream stream_;
}; };
@ -68,12 +74,47 @@ std::vector<T*> FilterByNodeWrapper(const Container& con) {
return ret; return ret;
} }
// bool DetectCircleRecursive(const std::map<ir::Node*,
// std::unordered_set<ir::Node*>>, std::unordered_set<ir::Node*>* visited,
// std::unordered_set<ir::Node*> *in_trace, std::vector<std::vector<ir::Node*>>*
// circles) {
// if (visited->find(node) == visited->end()) {
// visited->insert(node);
// in_trace->insert(node);
// for (ir::Node *in : adj_list.at(node)) {
// if (visited->find(in) == visited->end() &&
// HasCircleHelper(in, adj_list, visited, in_trace)) {
// return true;
// } else if (in_trace->find(in) != in_trace->end()) {
// circles->push_back(in_trace);
// return true;
// }
// }
// }
// in_trace->erase(node);
// return false;
// }
// bool DetectCircle(const std::map<ir::Node*, std::unordered_set<ir::Node*>>&
// adj_list, std::vector<std::vector<ir::Node*>>* circles) {
// std::unordered_set<ir::Node *> visited;
// std::unordered_set<ir::Node *> in_trace;
// bool has_circle = false;
// for(auto& adj : adj_list) {
// has_circle &= DetectCircleRecursive(adj, adj_list,&visited, &in_trace,
// circles);
// }
// return has_circle;
// }
std::unordered_map<ir::Node*, int> SSAGraphPrinterImpl::ToGraphvizNode( std::unordered_map<ir::Node*, int> SSAGraphPrinterImpl::ToGraphvizNode(
const ir::Graph& graph) const { const ir::Graph& graph) const {
// Convert to GraphvizNode format // Convert to GraphvizNode format
auto& graphviz_nodes = graph.Get<GraphvizNodes>(kGraphviz); auto& graphviz_nodes = graph.Get<GraphvizNodes>(kGraphviz);
graphviz_nodes.clear(); graphviz_nodes.clear();
std::unordered_map<ir::Node*, int> vars; std::unordered_map<ir::Node*, int> vars;
std::unordered_map<ir::Node*, GraphvizOp*> ops;
int var_id = 0; int var_id = 0;
int op_id = 0; int op_id = 0;
for (auto& node : graph.Nodes()) { for (auto& node : graph.Nodes()) {
@ -81,11 +122,33 @@ std::unordered_map<ir::Node*, int> SSAGraphPrinterImpl::ToGraphvizNode(
graphviz_nodes.emplace(new GraphvizVar(node, var_id)); graphviz_nodes.emplace(new GraphvizVar(node, var_id));
vars.emplace(std::make_pair(node, var_id++)); vars.emplace(std::make_pair(node, var_id++));
} else if (node->IsOp()) { } else if (node->IsOp()) {
graphviz_nodes.emplace(new GraphvizOp(node, op_id++)); std::unique_ptr<GraphvizOp> op(new GraphvizOp(node, op_id++));
ops[node] = op.get();
graphviz_nodes.emplace(std::move(op));
// graphviz_nodes.emplace(new GraphvizOp(node, op_id++));
// ops.emplace(std::make_pair(node, graphviz_nodes.back().get()));
} else { } else {
PADDLE_THROW("Unknown op type"); PADDLE_THROW("Unknown op type");
} }
} }
// Detect circle. Draw circle in different lines
std::vector<std::vector<ir::Node*>> circles;
const std::string kCircleEdge = "[color=red,penwidth=3.0]";
if (ir::FindCircleSubGraph(graph, &circles)) {
VLOG(3) << "Graph has circle! circles count : " << circles.size();
for (auto& circle : circles) {
for (size_t i = 0; i < circle.size() - 1; ++i) {
GraphvizOp* prev = ops[circle[i]];
GraphvizOp* next = ops[circle[i + 1]];
std::string prev_op = "op_" + std::to_string(prev->Id());
std::string next_op = "op_" + std::to_string(next->Id());
prev->AddCustomEdge([&]() -> std::string {
return prev_op + "->" + next_op + kCircleEdge;
});
}
}
}
return vars; return vars;
} }

@ -31,6 +31,8 @@ class GraphvizNode {
GraphvizNode(ir::Node* n, const int& i) : node_(n), id_(i) {} GraphvizNode(ir::Node* n, const int& i) : node_(n), id_(i) {}
virtual ~GraphvizNode() = default; virtual ~GraphvizNode() = default;
int Id() const { return id_; }
protected: protected:
ir::Node* node_; ir::Node* node_;
int id_; int id_;

@ -19,6 +19,9 @@ REGISTER_OPERATOR(sum, paddle::framework::DummyOp,
paddle::framework::SumOpMaker); paddle::framework::SumOpMaker);
REGISTER_OPERATOR(split, paddle::framework::DummyOp, REGISTER_OPERATOR(split, paddle::framework::DummyOp,
paddle::framework::SplitOpMaker); paddle::framework::SplitOpMaker);
REGISTER_OPERATOR(assign, paddle::framework::DummyOp,
paddle::framework::AssignOpMaker,
paddle::framework::DummyVarTypeInference);
/* /*
a @ b a @ b
@ -54,6 +57,12 @@ inline static ProgramDesc FillProgramDesc() {
op->SetInput("X", {"d", "e"}); op->SetInput("X", {"d", "e"});
op->SetOutput("Out", {"d"}); op->SetOutput("Out", {"d"});
} }
{
auto* op = prog.MutableBlock(0)->AppendOp();
op->SetType("assign");
op->SetInput("X", {"d"});
op->SetOutput("Out", {"d"});
}
return prog; return prog;
} }
@ -74,6 +83,108 @@ TEST(SSAGraphPrinter, Normal) {
printer->Print(*graph, *fout); printer->Print(*graph, *fout);
} }
using ir::Graph;
using ir::Node;
void BuildCircleGraph(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
o1->outputs.push_back(v1);
o1->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o1);
}
void BuildCircleGraph2(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
ir::Node* v2 = g->CreateEmptyNode("var2", Node::Type::kVariable);
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
o2->outputs.push_back(v2);
o1->inputs.push_back(v2);
v2->inputs.push_back(o2);
v2->outputs.push_back(o1);
}
void BuildNoCircleGraph(Graph* g) {
ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation);
ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation);
ir::Node* o3 = g->CreateEmptyNode("op3", Node::Type::kOperation);
ir::Node* o4 = g->CreateEmptyNode("op4", Node::Type::kOperation);
ir::Node* o5 = g->CreateEmptyNode("op5", Node::Type::kOperation);
ir::Node* v1 = g->CreateEmptyNode("var1", Node::Type::kVariable);
ir::Node* v2 = g->CreateEmptyNode("var2", Node::Type::kVariable);
ir::Node* v3 = g->CreateEmptyNode("var3", Node::Type::kVariable);
ir::Node* v4 = g->CreateEmptyNode("var4", Node::Type::kVariable);
// o1->v1->o2
o1->outputs.push_back(v1);
o2->inputs.push_back(v1);
v1->inputs.push_back(o1);
v1->outputs.push_back(o2);
// o2->v2->o3
// o2->v2->o4
o2->outputs.push_back(v2);
o3->inputs.push_back(v2);
o4->inputs.push_back(v2);
v2->inputs.push_back(o2);
v2->outputs.push_back(o3);
v2->outputs.push_back(o4);
// o2->v3->o5
o2->outputs.push_back(v3);
o5->inputs.push_back(v3);
v3->inputs.push_back(o2);
v3->outputs.push_back(o5);
// o3-v4->o5
o3->outputs.push_back(v4);
o5->inputs.push_back(v4);
v4->inputs.push_back(o3);
v4->outputs.push_back(o5);
// o2->v3->o1
v3->outputs.push_back(o1);
o1->inputs.push_back(v3);
}
TEST(SSAGraphPrinter, SimpleCircle) {
ProgramDesc prog;
Graph graph(prog);
BuildCircleGraph(&graph);
ASSERT_TRUE(HasCircle(graph));
graph.Set<GraphvizNodes>(kGraphviz, new GraphvizNodes);
std::unique_ptr<SSAGraphPrinter> printer(new SSAGraphPrinterImpl);
// redirect debug graph to a file.
constexpr char graph_path[] = "graph_print_pass_simple_circle.txt";
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_path));
PADDLE_ENFORCE(fout->good());
printer->Print(graph, *fout);
}
TEST(SSAGraphPrinter, ComplexCircle) {
ProgramDesc prog;
Graph graph(prog);
BuildCircleGraph2(&graph);
ASSERT_TRUE(HasCircle(graph));
graph.Set<GraphvizNodes>(kGraphviz, new GraphvizNodes);
std::unique_ptr<SSAGraphPrinter> printer(new SSAGraphPrinterImpl);
// redirect debug graph to a file.
constexpr char graph_path[] = "graph_print_pass_complex_circle.txt";
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_path));
PADDLE_ENFORCE(fout->good());
printer->Print(graph, *fout);
}
} // namespace details } // namespace details
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

File diff suppressed because it is too large Load Diff

@ -2,7 +2,7 @@
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may abtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
@ -15,6 +15,7 @@
#pragma once #pragma once
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/details/memory_optimize_helper.h" #include "paddle/fluid/framework/details/memory_optimize_helper.h"
@ -40,10 +41,20 @@ class GraphView {
bool OutConnectInputByCtrlVar(ir::Node* in_var, ir::Node* out_var); bool OutConnectInputByCtrlVar(ir::Node* in_var, ir::Node* out_var);
// Will Deperated in the future.
// NOTE(dzhwinter) : Python memory optimize will reuse
// memory based var name, so different op output may
// have the same variable name. enable inplace on such node
// will generate a circle in ssa graph.
bool ReusedInPythonMemOpt(const std::string& var) const;
private: private:
std::vector<ir::Node*> ops_; std::vector<ir::Node*> ops_;
std::unordered_set<std::string> dup_nodes_; // mem opt affect nodes
std::map<ir::Node*, std::unordered_set<ir::Node*>> adj_list_;
}; };
typedef std::unordered_map<ir::Node*, std::vector<ir::Node*>> SSANodeVector;
class InplacePass : public ir::Pass { class InplacePass : public ir::Pass {
public: public:
InplacePass(); InplacePass();
@ -58,6 +69,15 @@ class InplacePass : public ir::Pass {
void InplaceModifyVar(const std::string& in_var, const std::string& out_var, void InplaceModifyVar(const std::string& in_var, const std::string& out_var,
const size_t& idx, ir::Graph* graph) const; const size_t& idx, ir::Graph* graph) const;
const SSANodeVector TryInplaceModifyVar(const std::string& var,
const std::string& cache_var,
const size_t& idx,
ir::Graph* graph) const;
void CommitModify(const SSANodeVector&, ir::Graph* graph) const;
void WithDrawModify(const SSANodeVector& nodes, ir::Graph* graph) const;
void InplaceModifyDesc(const std::string& in_var, const std::string& out_var, void InplaceModifyDesc(const std::string& in_var, const std::string& out_var,
const size_t& idx) const; const size_t& idx) const;

@ -52,16 +52,29 @@ bool HasCircleHelper(
ir::Node *node, ir::Node *node,
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list, const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
std::unordered_set<ir::Node *> *visited, std::unordered_set<ir::Node *> *visited,
std::unordered_set<ir::Node *> *in_trace) { std::unordered_set<ir::Node *> *in_trace,
std::vector<std::vector<ir::Node *>> *circles) {
if (visited->find(node) == visited->end()) { if (visited->find(node) == visited->end()) {
visited->insert(node); visited->insert(node);
in_trace->insert(node); in_trace->insert(node);
for (ir::Node *in : adj_list.at(node)) { for (ir::Node *in : adj_list.at(node)) {
if (visited->find(in) == visited->end() && if (visited->find(in) == visited->end() &&
HasCircleHelper(in, adj_list, visited, in_trace)) { HasCircleHelper(in, adj_list, visited, in_trace, circles)) {
return true; return true;
} else if (in_trace->find(in) != in_trace->end()) { } else if (in_trace->find(in) != in_trace->end()) {
if (circles != nullptr) {
std::vector<ir::Node *> circle;
circle.emplace_back(in);
ir::Node *p = in;
for (auto &adj : adj_list.at(p)) {
if (in_trace->count(adj)) {
circle.emplace_back(adj);
p = adj;
}
}
circles->emplace_back(circle);
}
return true; return true;
} }
} }
@ -71,11 +84,12 @@ bool HasCircleHelper(
} }
bool HasCircleInternal( bool HasCircleInternal(
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list) { const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
std::vector<std::vector<ir::Node *>> *circles) {
std::unordered_set<ir::Node *> visited; std::unordered_set<ir::Node *> visited;
std::unordered_set<ir::Node *> in_trace; std::unordered_set<ir::Node *> in_trace;
for (auto &adj : adj_list) { for (auto &adj : adj_list) {
if (HasCircleHelper(adj.first, adj_list, &visited, &in_trace)) { if (HasCircleHelper(adj.first, adj_list, &visited, &in_trace, circles)) {
return true; return true;
} }
} }
@ -84,13 +98,18 @@ bool HasCircleInternal(
} // namespace } // namespace
bool HasCircle(const Graph &graph) { bool HasCircle(const Graph &graph) {
return HasCircleInternal(BuildOperationAdjList(graph)); return HasCircleInternal(BuildOperationAdjList(graph), nullptr);
}
bool FindCircleSubGraph(const Graph &graph,
std::vector<std::vector<ir::Node *>> *circles) {
return HasCircleInternal(BuildOperationAdjList(graph), circles);
} }
std::vector<ir::Node *> TopologySortOperations(const Graph &graph) { std::vector<ir::Node *> TopologySortOperations(const Graph &graph) {
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list = std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list =
BuildOperationAdjList(graph); BuildOperationAdjList(graph);
PADDLE_ENFORCE(!HasCircleInternal(adj_list)); PADDLE_ENFORCE(!HasCircleInternal(adj_list, nullptr));
std::unordered_set<ir::Node *> visited; std::unordered_set<ir::Node *> visited;
std::vector<ir::Node *> ret; std::vector<ir::Node *> ret;
for (auto adj : adj_list) { for (auto adj : adj_list) {

@ -28,6 +28,11 @@ namespace ir {
// Test if the graph contains circle. // Test if the graph contains circle.
bool HasCircle(const Graph &graph); bool HasCircle(const Graph &graph);
// Find All Circles for debugging,
// store all subgraph in circles.
bool FindCircleSubGraph(const Graph &graph,
std::vector<std::vector<ir::Node *>> *circles);
size_t GraphNum(const Graph &graph); size_t GraphNum(const Graph &graph);
// Topology Sort the operations in the graph from inputs to outputs. // Topology Sort the operations in the graph from inputs to outputs.

@ -195,6 +195,17 @@ void BuildTwoGraphs(Graph* g) {
// v4->outputs.push_back(o5); // v4->outputs.push_back(o5);
} }
TEST(GraphHelperTest, Circles) {
ProgramDesc prog;
Graph g(prog);
BuildCircleGraph(&g);
std::vector<std::vector<ir::Node*>> circles;
ASSERT_TRUE(FindCircleSubGraph(g, &circles));
ASSERT_EQ(circles.size() == 1UL);
}
TEST(GraphHelperTest, GraphNum) { TEST(GraphHelperTest, GraphNum) {
ProgramDesc prog; ProgramDesc prog;

@ -32,7 +32,7 @@ class TestParallelExecutorBase(unittest.TestCase):
def check_network_convergence(self, def check_network_convergence(self,
method, method,
use_cuda=True, use_cuda=True,
memory_opt=True, memory_opt=False,
iter=50, iter=50,
batch_size=None, batch_size=None,
allow_op_delay=False, allow_op_delay=False,
@ -67,8 +67,6 @@ class TestParallelExecutorBase(unittest.TestCase):
if memory_opt: if memory_opt:
fluid.memory_optimize(main) fluid.memory_optimize(main)
with open("program_model.txt", "w") as f:
f.write(str(main))
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup) exe.run(startup)
@ -82,9 +80,10 @@ class TestParallelExecutorBase(unittest.TestCase):
build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops
build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv
build_strategy.memory_optimize = use_ir_memory_optimize build_strategy.memory_optimize = use_ir_memory_optimize
build_strategy.enable_inplace = enable_inplace # python memory optimization is conflict with inplace pass.
# Use ir graph memory optimization after inplace pass is the correct way.
build_strategy.enable_inplace = False if memory_opt else enable_inplace
build_strategy.enable_sequential_execution = enable_sequential_execution build_strategy.enable_sequential_execution = enable_sequential_execution
build_strategy.debug_graphviz_path = "debug_ir_graph_"
if use_cuda and core.is_compiled_with_cuda(): if use_cuda and core.is_compiled_with_cuda():
build_strategy.remove_unnecessary_lock = True build_strategy.remove_unnecessary_lock = True

@ -46,7 +46,10 @@ class TestIrInplace(TestParallelExecutorBase):
def setUpClass(cls): def setUpClass(cls):
os.environ['CPU_NUM'] = str(4) os.environ['CPU_NUM'] = str(4)
def _fc_with_batchnorm(self, ir_memory_optimize, enable_inplace): def _fc_with_batchnorm(self,
ir_memory_optimize,
enable_inplace,
memory_opt=False):
np.random.seed(5) np.random.seed(5)
img = np.random.random(size=[32, 784]).astype(np.float32) img = np.random.random(size=[32, 784]).astype(np.float32)
label = np.ones(shape=[32, 1], dtype='int64') label = np.ones(shape=[32, 1], dtype='int64')
@ -55,7 +58,7 @@ class TestIrInplace(TestParallelExecutorBase):
feed_dict={"image": img, feed_dict={"image": img,
"label": label}, "label": label},
use_cuda=True, use_cuda=True,
memory_opt=False, # inplace is conflict with memory opt memory_opt=memory_opt,
use_ir_memory_optimize=ir_memory_optimize, use_ir_memory_optimize=ir_memory_optimize,
enable_inplace=enable_inplace) enable_inplace=enable_inplace)
@ -67,3 +70,10 @@ class TestIrInplace(TestParallelExecutorBase):
self.assertAlmostEqual(loss00, loss10, delta=delta) self.assertAlmostEqual(loss00, loss10, delta=delta)
self.assertAlmostEqual(loss00, loss01, delta=delta) self.assertAlmostEqual(loss00, loss01, delta=delta)
self.assertAlmostEqual(loss00, loss11, delta=delta) self.assertAlmostEqual(loss00, loss11, delta=delta)
def test_fc_with_batchnorm_memory_opt(self, delta=1e-3):
loss00 = self._fc_with_batchnorm(False, True, False)
loss10 = self._fc_with_batchnorm(False, True, True)
loss10 = self._fc_with_batchnorm(True, True, True)
self.assertAlmostEqual(loss00, loss10, delta=delta)
self.assertAlmostEqual(loss00, loss01, delta=delta)

Loading…
Cancel
Save