From 5ec2fb0c93d0e799ea1fc215be0072488399c31e Mon Sep 17 00:00:00 2001 From: nhzlx Date: Fri, 31 Aug 2018 11:32:35 +0000 Subject: [PATCH 1/5] add flexibledfs for find path between two nodes --- .../inference/analysis/data_flow_graph.cc | 37 ++++++++++ .../inference/analysis/data_flow_graph.h | 3 + .../analysis/data_flow_graph_tester.cc | 71 +++++++++++++++++++ 3 files changed, 111 insertions(+) diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index 100a7504b8..e4f4bbf43c 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -480,6 +480,8 @@ void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) { for (auto *out : op_nodes[i]->outlinks) { if (follow_up_input_names.count(out->name())) { filtered_subgraph_outlinks.push_back(out); + } else { + out->SetDeleted(); } } PADDLE_ENFORCE_GE(filtered_subgraph_outlinks.size(), 1UL); @@ -487,6 +489,41 @@ void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) { } } +void FlexibleDFS(const std::vector &source, bool reverse, + const std::function &enter, + const std::function &leave) { + typedef struct { + const Node *node; + bool leave; + } FNode; + std::vector stack; + for (auto &node : source) { + stack.push_back(FNode{node, false}); + } + std::unordered_set visited; + while (!stack.empty()) { + auto fnode = stack.back(); + stack.pop_back(); + + if (fnode.leave) { + if (leave && !leave(fnode.node)) return; + } + if (visited.count(fnode.node)) continue; + visited.insert(fnode.node); + + if (enter && !enter(fnode.node)) return; + + if (leave) stack.push_back(FNode{fnode.node, true}); + const std::vector iter_nodes = + reverse == true ? fnode.node->inlinks : fnode.node->outlinks; + for (const Node *node : iter_nodes) { + if (!visited.count(node)) { + stack.push_back(FNode{node, false}); + } + } + } +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index 437e097acd..4fefc175f3 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -204,6 +204,9 @@ std::pair, std::vector> ExtractInputAndOutputOfSubGraph(std::vector &graph); // NOLINT void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph); +void FlexibleDFS(const std::vector &source, bool reverse, + const std::function &enter, + const std::function &leave); } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc index 1682011c3d..040ca19514 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -160,6 +160,77 @@ TEST(DataFlowGraph, Build_IR_Graph) { ASSERT_EQ(graph.nodes.size(), ir_graph.Nodes().size()); } +// FlexibleDFS +/* + * Graph topology + * inputs: 0 + * 0 -> 1 + * 1 -> 2 + * 1 -> 3 + * 3 -> 4 + * 4 -> 5 + * 5 -> 2 + */ +TEST(DataFlowGraph, flexibledfs) { + DataFlowGraph graph; + + for (int i = 0; i < 6; i++) { + auto* node = graph.nodes.Create(Node::Type::kValue); + node->SetName("node-" + std::to_string(i)); + } + + auto add_link = [&](int i, int j) { + Node* source = graph.nodes.GetMutable(i); + Node* target = graph.nodes.GetMutable(j); + target->inlinks.push_back(source); + source->outlinks.push_back(target); + }; + + add_link(0, 1); + add_link(1, 2); + add_link(1, 3); + add_link(3, 4); + add_link(4, 5); + add_link(5, 2); + graph.Build(); + + std::vector order; + FlexibleDFS(graph.inputs(), false, nullptr, [&order](const Node* n) { + order.push_back(n); + return true; + }); + + ASSERT_EQ(order.size(), 6UL); + + order.clear(); + // reverse dfs + FlexibleDFS(graph.outputs(), true, nullptr, [&order](const Node* n) { + order.push_back(n); + return true; + }); + + ASSERT_EQ(order.size(), 6UL); + + // If we delete + Node* last_node = graph.nodes.GetMutable(2); + Node* direct_node = graph.nodes.GetMutable(1); + std::vector source_nodes; + for (Node* node : last_node->inlinks) { + if (node != direct_node) source_nodes.push_back(node); + } + + bool has_cycle = false; + FlexibleDFS(source_nodes, true, nullptr, + [&has_cycle, direct_node](const Node* n) { + if (n == direct_node) { + has_cycle = true; + return false; + } + return true; + }); + ASSERT_TRUE(has_cycle); +} + } // namespace analysis } // namespace inference } // namespace paddle From 03ff4f689213a6dc2c469dfd0c2cffe16e6b418d Mon Sep 17 00:00:00 2001 From: nhzlx Date: Tue, 11 Sep 2018 08:27:24 +0000 Subject: [PATCH 2/5] fix subgraph bug! --- .../inference/analysis/data_flow_graph.cc | 39 +--- .../inference/analysis/data_flow_graph.h | 3 - .../analysis/data_flow_graph_to_fluid_pass.cc | 25 ++- .../inference/analysis/subgraph_splitter.cc | 186 +++++++++++++++++- .../analysis/subgraph_splitter_tester.cc | 2 +- paddle/fluid/operators/tensorrt_engine_op.h | 20 +- 6 files changed, 215 insertions(+), 60 deletions(-) diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index e4f4bbf43c..8c7d58678f 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -440,6 +440,7 @@ ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT } return false; }; + for (auto &node : graph) { for (auto *in : node->inlinks) { // The Value that is written by nodes inside a sub-graph shouldn't be the @@ -459,6 +460,7 @@ ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT std::vector(outputs.begin(), outputs.end())); } +// Filter the Intermediate results of the subgraph node. void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) { std::vector op_nodes; for (auto &node : GraphTraits(*graph).nodes_in_TS()) { @@ -484,46 +486,11 @@ void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) { out->SetDeleted(); } } - PADDLE_ENFORCE_GE(filtered_subgraph_outlinks.size(), 1UL); + // The filtered_subgraph_outlinks may be empty. op_nodes[i]->outlinks = filtered_subgraph_outlinks; } } -void FlexibleDFS(const std::vector &source, bool reverse, - const std::function &enter, - const std::function &leave) { - typedef struct { - const Node *node; - bool leave; - } FNode; - std::vector stack; - for (auto &node : source) { - stack.push_back(FNode{node, false}); - } - std::unordered_set visited; - while (!stack.empty()) { - auto fnode = stack.back(); - stack.pop_back(); - - if (fnode.leave) { - if (leave && !leave(fnode.node)) return; - } - if (visited.count(fnode.node)) continue; - visited.insert(fnode.node); - - if (enter && !enter(fnode.node)) return; - - if (leave) stack.push_back(FNode{fnode.node, true}); - const std::vector iter_nodes = - reverse == true ? fnode.node->inlinks : fnode.node->outlinks; - for (const Node *node : iter_nodes) { - if (!visited.count(node)) { - stack.push_back(FNode{node, false}); - } - } - } -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index 4fefc175f3..437e097acd 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -204,9 +204,6 @@ std::pair, std::vector> ExtractInputAndOutputOfSubGraph(std::vector &graph); // NOLINT void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph); -void FlexibleDFS(const std::vector &source, bool reverse, - const std::function &enter, - const std::function &leave); } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc index 80c85555e7..47e9752ff2 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc @@ -106,20 +106,23 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, // collect inputs std::unordered_set input_names; + std::unordered_set input_names_with_id; for (auto *x : func->inlinks) { input_names.insert(x->name()); + input_names_with_id.insert(x->name() + std::to_string(x->id())); } desc.SetInput( "Xs", std::vector(input_names.begin(), input_names.end())); std::unordered_set output_names; + std::unordered_set output_names_with_id; for (auto *x : func->outlinks) { output_names.insert(x->name()); + output_names_with_id.insert(x->name() + std::to_string(x->id())); } - std::vector output_temp(output_names.begin(), - output_names.end()); - desc.SetOutput("Ys", output_temp); + desc.SetOutput( + "Ys", std::vector(output_names.begin(), output_names.end())); desc.SetType("tensorrt_engine"); std::unordered_map output_name_map; @@ -153,11 +156,12 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, std::vector replaced_names; for (int k = 0; k < in_var->arguments_size(); k++) { std::string arg_value = in_var->arguments(k); - if (input_names.count(arg_value)) { + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (input_names_with_id.count(arg_value_with_id)) { replaced_names.push_back(arg_value); } else { - replaced_names.push_back(arg_value + - std::to_string(var2id[arg_value])); + replaced_names.push_back(arg_value_with_id); } } in_var->clear_arguments(); @@ -176,11 +180,12 @@ void CreateTrtEngineOp(Node *node, const DataFlowGraph &graph, std::vector replaced_names; for (int k = 0; k < out_var->arguments_size(); k++) { std::string arg_value = out_var->arguments(k); - if (output_names.count(arg_value)) { - output_name_map[arg_value] = - arg_value + std::to_string(var2id[arg_value]); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (output_names_with_id.count(arg_value_with_id)) { + output_name_map[arg_value] = arg_value_with_id; } - replaced_names.push_back(arg_value + std::to_string(var2id[arg_value])); + replaced_names.push_back(arg_value_with_id); } out_var->clear_arguments(); for (size_t k = 0; k < replaced_names.size(); k++) { diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index 670a8de667..857375fc21 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -74,13 +74,126 @@ void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) { node_map.at(b)->attr(kUnionFindParent).Int32() = a_ancestor; } +// This is a simple representation of a graph. +// The BriefNode hold the pointer of the Node. +// This is to avoid changing the original graph +// in the process of trt graph analysis. +struct BriefNode { + explicit BriefNode(Node *n) { node = n; } + Node *node; + std::vector inlinks; + std::vector outlinks; +}; + +void UnionContractedNodes(const std::unordered_map &node_map, + int src_id, int dst_id) { + // merge the two adjacent nodes into one node. + BriefNode *src_node = node_map.at(src_id); + BriefNode *dst_node = node_map.at(dst_id); + + std::unordered_set inputs(src_node->inlinks.begin(), + src_node->inlinks.end()); + std::unordered_set outputs; + + for (auto *n : src_node->outlinks) { + if (n != dst_node) outputs.insert(n); + } + + // Add the inlinks and outlinks of dst node to src node. + std::vector dst_in_nodes = dst_node->inlinks; + for (BriefNode *node : dst_in_nodes) { + if (node != src_node) { + inputs.insert(node); + } + } + + std::vector dst_out_nodes = dst_node->outlinks; + for (BriefNode *node : dst_out_nodes) { + outputs.insert(node); + } + + // update the dst and src node's inlinks and outlinks. + src_node->inlinks = + std::move(std::vector(inputs.begin(), inputs.end())); + src_node->outlinks = + std::move(std::vector(outputs.begin(), outputs.end())); + dst_node->inlinks.clear(); + dst_node->outlinks.clear(); + + auto inlink_or_outlink_cleaner = [&](std::vector &nodes) { + for (auto *&n : nodes) { + if (n == src_node || n == dst_node) { + n = src_node; + } + } + }; + // Change all the dst inputs and outputs corresponding inlink and + // outlink to the src node. + for (auto *node : src_node->inlinks) { + inlink_or_outlink_cleaner(node->outlinks); + } + + for (auto *node : src_node->outlinks) { + inlink_or_outlink_cleaner(node->inlinks); + } +} + +// FlexibleDfS +// If reverse is true, do reverse dfs. +// If enter func is not nullptr, calls enter(node) before visiting any children +// of node. +// If leave func not nullptr, calls leave(node) after visiting all parents of +// node. +void FlexibleDFS(const std::vector &source, bool reverse, + const std::function &enter, + const std::function &leave) { + typedef struct { + const BriefNode *node; + bool leave; + } FNode; + + std::vector stack; + for (auto &node : source) { + stack.push_back(FNode{node, false}); + } + std::unordered_set visited; + while (!stack.empty()) { + auto fnode = stack.back(); + stack.pop_back(); + + if (fnode.leave) { + if (leave && !leave(fnode.node)) return; + } + if (visited.count(fnode.node)) continue; + visited.insert(fnode.node); + + if (enter && !enter(fnode.node)) return; + + if (leave) stack.push_back(FNode{fnode.node, true}); + const std::vector iter_nodes = + reverse == true ? fnode.node->inlinks : fnode.node->outlinks; + for (const BriefNode *node : iter_nodes) { + if (!visited.count(node)) { + stack.push_back(FNode{node, false}); + } + } + } +} + std::vector> SubGraphSplitter::ExtractSubGraphs() { + // Run the Extract algorithm to find all subgraphs. std::vector marked_nodes; + // We use brief_node_map to represent the original graph in order to avoid + // changing the original graph. + std::unordered_map brief_node_map; + for (auto &node : GraphTraits(*graph_).nodes_in_TS()) { + brief_node_map[node.id()] = new BriefNode(&node); if (node.attr(kMarkerAttrName).Bool()) { marked_nodes.push_back(&node); } } + // extract sub-graphs in the marked node set, use Union Find algorithm. node_map_t node_map; // id to ptr for (auto *n : marked_nodes) { @@ -88,11 +201,73 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { n->attr(kUnionFindParent).Int32() = n->id(); node_map[n->id()] = n; } - std::unordered_set visited; - for (auto *n : marked_nodes) { - for (auto *out : n->outlinks) { - if (node_map.count(out->id())) { - UnionFindCombine(node_map, n->id(), out->id()); + + // create breif node map + for (auto &itr : brief_node_map) { + for (Node *node : itr.second->node->inlinks) { + itr.second->inlinks.push_back(brief_node_map[node->id()]); + } + + for (Node *node : itr.second->node->outlinks) { + itr.second->outlinks.push_back(brief_node_map[node->id()]); + } + } + + for (auto &itr : brief_node_map) { + BriefNode *brief_node = itr.second; + + if (!brief_node->node->attr(kMarkerAttrName).Bool()) { + VLOG(4) << brief_node->node->id() << " node not a trt candicate."; + continue; + } + + // Our algorithm must guarantee that: + // 1. The graph is always directed acyclic graph(DAG). + // 2. If there is a path in the subgraph from X to Y (X and Y are both + // nodes + // in the subgraph), then all paths from X to Y are in the subgraph. + // + // In order to achieve the above guarantee. + // For adjacent nodes src -> dst. + // 1. Get all dst input nodes except src. + // 2. Reverse DFS from those input nodes + // 3. If there is a path from input nodes to src, + // then the src and dst nodes can not be fused into one node, + // otherwise it can be done. + + while (true) { + std::unordered_set contract_nodes; + for (auto *out : brief_node->outlinks) { + // must be an trt candidate + if (!out->node->attr(kMarkerAttrName).Bool()) continue; + // get all dst input nodes except src. + std::vector source_nodes; + for (auto *n : out->inlinks) { + if (n != brief_node) { + source_nodes.push_back(n); + } + } + + // Reverse DFS from the source_nodes. + bool have_excess_path = false; + FlexibleDFS(source_nodes, true, nullptr, + [&have_excess_path, brief_node](const BriefNode *n) { + if (n == brief_node) { + have_excess_path = true; + return false; + } + return true; + }); + if (have_excess_path) continue; + contract_nodes.insert(out); + } + if (contract_nodes.empty()) break; + + for (auto dst_node : contract_nodes) { + UnionFindCombine(node_map, brief_node->node->id(), + dst_node->node->id()); + UnionContractedNodes(brief_node_map, brief_node->node->id(), + dst_node->node->id()); } } } @@ -128,6 +303,7 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() { auto io = ExtractInputAndOutputOfSubGraph(subgraph); block_node->inlinks = std::move(io.first); block_node->outlinks = std::move(io.second); + for (auto *node : subgraph) { // TODO(Superjomn) need a unified mechanism to treat deleted node in each // pass. diff --git a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc index 39cc433b40..531a170512 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc @@ -82,7 +82,7 @@ TEST(SubGraphSplitter, Fuse) { // At least one nodes should be deleted. ASSERT_EQ(dfg.nodes.size(), count0 + 1); // added a new FunctionBlock - ASSERT_EQ(6, count1); + ASSERT_EQ(11, count1); } } // namespace analysis diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index bc556ab364..395d8bcc07 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -160,11 +160,21 @@ class TensorRTEngineKernel : public framework::OpKernel { fluid_t->mutable_data(platform::CUDAPlace( boost::get(context.GetPlace()).device)), size * sizeof(float)); - //} else { - // engine->GetOutputInGPU( - // y, fluid_t->mutable_data(platform::CUDAPlace()), - // size * sizeof(float)); - //} + + // TODO(zhaolong) : delete it sometimes + /* THIS CODE JUST FOR TEST + std::cout << output_maps[output_index] << std::endl; + platform::CPUPlace cpu_place; + framework::LoDTensor temp_tensor; + temp_tensor.Resize(framework::make_ddim(ddim)); + auto* temp_data = temp_tensor.mutable_data(cpu_place); + + TensorCopySync(*fluid_t, cpu_place ,&temp_tensor); + for(int i = 0; i < size; i++) { + std::cout << temp_data[i] << " " ; + } + std::cout << std::endl; + */ output_index += 1; } From df161e08f0974b5fc77a62714c94bcdb8f04c412 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Tue, 11 Sep 2018 08:36:29 +0000 Subject: [PATCH 3/5] delete unuse ut --- .../analysis/data_flow_graph_tester.cc | 71 ------------------- .../inference/analysis/subgraph_splitter.cc | 2 +- 2 files changed, 1 insertion(+), 72 deletions(-) diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc index 040ca19514..1682011c3d 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc @@ -160,77 +160,6 @@ TEST(DataFlowGraph, Build_IR_Graph) { ASSERT_EQ(graph.nodes.size(), ir_graph.Nodes().size()); } -// FlexibleDFS -/* - * Graph topology - * inputs: 0 - * 0 -> 1 - * 1 -> 2 - * 1 -> 3 - * 3 -> 4 - * 4 -> 5 - * 5 -> 2 - */ -TEST(DataFlowGraph, flexibledfs) { - DataFlowGraph graph; - - for (int i = 0; i < 6; i++) { - auto* node = graph.nodes.Create(Node::Type::kValue); - node->SetName("node-" + std::to_string(i)); - } - - auto add_link = [&](int i, int j) { - Node* source = graph.nodes.GetMutable(i); - Node* target = graph.nodes.GetMutable(j); - target->inlinks.push_back(source); - source->outlinks.push_back(target); - }; - - add_link(0, 1); - add_link(1, 2); - add_link(1, 3); - add_link(3, 4); - add_link(4, 5); - add_link(5, 2); - graph.Build(); - - std::vector order; - FlexibleDFS(graph.inputs(), false, nullptr, [&order](const Node* n) { - order.push_back(n); - return true; - }); - - ASSERT_EQ(order.size(), 6UL); - - order.clear(); - // reverse dfs - FlexibleDFS(graph.outputs(), true, nullptr, [&order](const Node* n) { - order.push_back(n); - return true; - }); - - ASSERT_EQ(order.size(), 6UL); - - // If we delete - Node* last_node = graph.nodes.GetMutable(2); - Node* direct_node = graph.nodes.GetMutable(1); - std::vector source_nodes; - for (Node* node : last_node->inlinks) { - if (node != direct_node) source_nodes.push_back(node); - } - - bool has_cycle = false; - FlexibleDFS(source_nodes, true, nullptr, - [&has_cycle, direct_node](const Node* n) { - if (n == direct_node) { - has_cycle = true; - return false; - } - return true; - }); - ASSERT_TRUE(has_cycle); -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index 857375fc21..773fceeeb2 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -138,7 +138,7 @@ void UnionContractedNodes(const std::unordered_map &node_map, } } -// FlexibleDfS +// FlexibleDFS // If reverse is true, do reverse dfs. // If enter func is not nullptr, calls enter(node) before visiting any children // of node. From 49bafc05bf7380874c92bd2954c5c96bca695ee4 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Thu, 13 Sep 2018 05:35:29 +0000 Subject: [PATCH 4/5] fix comments and set name for trt layer and ITensor --- .../fluid/inference/analysis/subgraph_splitter.cc | 12 ++++++++++-- .../inference/tensorrt/convert/activation_op.cc | 2 ++ .../inference/tensorrt/convert/batch_norm_op.cc | 2 ++ .../fluid/inference/tensorrt/convert/concat_op.cc | 4 ++++ .../fluid/inference/tensorrt/convert/conv2d_op.cc | 5 +++++ .../inference/tensorrt/convert/elementwise_op.cc | 4 ++++ paddle/fluid/inference/tensorrt/convert/fc_op.cc | 2 ++ .../fluid/inference/tensorrt/convert/pool2d_op.cc | 2 ++ paddle/fluid/operators/tensorrt_engine_op.h | 14 -------------- 9 files changed, 31 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index 773fceeeb2..c3a2dbf9d1 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -85,6 +85,14 @@ struct BriefNode { std::vector outlinks; }; +// Union two adjacent BriefNode. +// Suppose we have two adjacent nodes src and dst. +// We will perform the following operations: +// 1. add all inputs(except src) of dst to src inlinks. +// 2. add all outputs of dst to src outlinks. +// 3. change all the dst's inputs and outputs +// corresponding inlinks and outlinks to src node. +// 4. delete all dst's inlinks and outlinks. void UnionContractedNodes(const std::unordered_map &node_map, int src_id, int dst_id) { // merge the two adjacent nodes into one node. @@ -224,8 +232,8 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { // Our algorithm must guarantee that: // 1. The graph is always directed acyclic graph(DAG). // 2. If there is a path in the subgraph from X to Y (X and Y are both - // nodes - // in the subgraph), then all paths from X to Y are in the subgraph. + // nodes in the subgraph), then all paths from X to Y are in the + // subgraph. // // In order to achieve the above guarantee. // For adjacent nodes src -> dst. diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index e1cace9cc1..8168cdff1b 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -35,6 +35,8 @@ class ReluOpConverter : public OpConverter { engine_, Activation, *const_cast(input_tensor), nvinfer1::ActivationType::kRELU); auto output_name = op_desc.Output("Out")[0]; + layer->setName(("relu (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { // the test framework can not determine which is the // output, so place the declaration inside. diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index 94f8b0ae56..3330af2da6 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -116,6 +116,8 @@ class BatchNormOpConverter : public OpConverter { scale_weights.get(), power_weights.get()); auto output_name = op_desc.Output("Y").front(); + layer->setName(("batch_norm (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->weight_map[op_desc.Input("Bias").front()] = std::move(combile_bias_tensor); engine_->weight_map[op_desc.Input("Scale").front()] = diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index bb9627bf95..2983e91cb2 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -30,7 +30,9 @@ class ConcatOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs std::vector itensors; + std::cout << "Concat op: " << std::endl; for (auto& input_name : op_desc.Input("X")) { + std::cout << input_name << std::endl; itensors.push_back(engine_->GetITensor(input_name)); } int axis = boost::get(op_desc.GetAttr("axis")); @@ -42,6 +44,8 @@ class ConcatOpConverter : public OpConverter { axis = axis - 1; // Remove batch dim layer->setAxis(axis); auto output_name = op_desc.Output("Out")[0]; + layer->setName(("concat (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { // the test framework can not determine which is the // output, so place the declaration inside. diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 841a95db38..022e43a571 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -26,6 +26,9 @@ class Conv2dOpConverter : public OpConverter { << "convert a fluid conv2d op to tensorrt conv layer without bias"; framework::OpDesc op_desc(op, nullptr); + std::cout << "Conv op: " << std::endl; + std::cout << op_desc.Input("Input").front() << std::endl; + std::cout << op_desc.Output("Output").front() << std::endl; PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1); @@ -78,8 +81,10 @@ class Conv2dOpConverter : public OpConverter { layer->setNbGroups(groups); auto output_name = op_desc.Output("Output").front(); + layer->setName(("conv2d (Output: " + output_name + ")").c_str()); engine_->weight_map[op_desc.Input("Filter").front()] = std::move(weight_tensor); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { engine_->DeclareOutput(output_name); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 60a72b4eb5..0a6ce568f1 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -89,6 +89,8 @@ class ElementwiseWeightOpConverter : public OpConverter { shift_weights.get(), scale_weights.get(), power_weights.get()); auto output_name = op_desc.Output("Out")[0]; + layer->setName(("elementwise_add (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->weight_map[op_desc.Input("Y").front()] = std::move(weight_tensor); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { // the test framework can not determine which is the @@ -137,6 +139,8 @@ class ElementwiseTensorOpConverter : public OpConverter { *const_cast(Y), op_pair->second); auto output_name = op_desc.Output("Out")[0]; + layer->setName(("elementwise (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { // the test framework can not determine which is the // output, so place the declaration inside. diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index ad98d85aae..7c21ecd95d 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -107,6 +107,8 @@ class FcOpConverter : public OpConverter { n_output, tmp_weight.get(), bias.get()); auto output_name = op_desc.Output("Out").front(); + layer->setName(("fc (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); engine_->weight_map[op_desc.Input("Y").front()] = std::move(tmp); if (test_mode) { diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index 73f1b28ddf..f9bb66a6e9 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -72,6 +72,8 @@ class Pool2dOpConverter : public OpConverter { layer->setPadding(nv_paddings); auto output_name = op_desc.Output("Out")[0]; + layer->setName(("pool2d (Output: " + output_name + ")").c_str()); + layer->getOutput(0)->setName(output_name.c_str()); engine_->SetITensor(output_name, layer->getOutput(0)); if (test_mode) { engine_->DeclareOutput(output_name); diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 395d8bcc07..79e75ea9a0 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -161,20 +161,6 @@ class TensorRTEngineKernel : public framework::OpKernel { boost::get(context.GetPlace()).device)), size * sizeof(float)); - // TODO(zhaolong) : delete it sometimes - /* THIS CODE JUST FOR TEST - std::cout << output_maps[output_index] << std::endl; - platform::CPUPlace cpu_place; - framework::LoDTensor temp_tensor; - temp_tensor.Resize(framework::make_ddim(ddim)); - auto* temp_data = temp_tensor.mutable_data(cpu_place); - - TensorCopySync(*fluid_t, cpu_place ,&temp_tensor); - for(int i = 0; i < size; i++) { - std::cout << temp_data[i] << " " ; - } - std::cout << std::endl; - */ output_index += 1; } From 0092ad32856ea17c494a64b02e51d8bf14a0ad20 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Thu, 13 Sep 2018 08:08:35 +0000 Subject: [PATCH 5/5] delete unused log --- paddle/fluid/inference/tensorrt/convert/concat_op.cc | 2 -- paddle/fluid/inference/tensorrt/convert/conv2d_op.cc | 3 --- 2 files changed, 5 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 2983e91cb2..a11dfa1e8f 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -30,9 +30,7 @@ class ConcatOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs std::vector itensors; - std::cout << "Concat op: " << std::endl; for (auto& input_name : op_desc.Input("X")) { - std::cout << input_name << std::endl; itensors.push_back(engine_->GetITensor(input_name)); } int axis = boost::get(op_desc.GetAttr("axis")); diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index 022e43a571..0a37d3968c 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -26,9 +26,6 @@ class Conv2dOpConverter : public OpConverter { << "convert a fluid conv2d op to tensorrt conv layer without bias"; framework::OpDesc op_desc(op, nullptr); - std::cout << "Conv op: " << std::endl; - std::cout << op_desc.Input("Input").front() << std::endl; - std::cout << op_desc.Output("Output").front() << std::endl; PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);