diff --git a/ge/graph/build/memory/graph_mem_assigner.cc b/ge/graph/build/memory/graph_mem_assigner.cc index 33fda096..f4df8268 100755 --- a/ge/graph/build/memory/graph_mem_assigner.cc +++ b/ge/graph/build/memory/graph_mem_assigner.cc @@ -430,7 +430,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { GELOGE(FAILED, "node %s has no continuous type!", node->GetName().c_str()); return FAILED; } - GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second), + GE_CHK_STATUS_RET(AssignContinuousInputMemoryWithAtomicProcess(node, iter->second, true), "Assign node %s continuous input memory failed.", node->GetName().c_str()) } for (auto pair : memory_offset_) { @@ -441,7 +441,7 @@ Status GraphMemoryAssigner::ReAssignContinuousMemory(bool is_loop_graph) { } Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type) { + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, bool reverse_refresh) { GELOGI("Current node %s needs continuous input.", node->GetName().c_str()); auto iter = memory_offset_.find(memory_type); if (iter == memory_offset_.end()) { @@ -508,12 +508,16 @@ Status GraphMemoryAssigner::AssignContinuousInputMemory(const ge::NodePtr &node, std::map out2ins; GE_CHK_STATUS_RET(GetAllRef(node, out2ins), "Node: %s get all ref failed", node->GetName().c_str()); // output is beginning offset, set offset for input; only support this case now - if (out2ins.size() == 1 && out2ins.begin()->second == 0) { + if ((out2ins.size() == 1) && (out2ins.begin()->second == 0) && (reverse_refresh)) { + auto peer_output_offset = output_list.at(peer_out_data_anchor->GetIdx()); output_list.at(peer_out_data_anchor->GetIdx()) = output_list_this.at(out2ins.begin()->first); peer_op_desc->SetOutputOffset(output_list); + GELOGI("Node %s out %d ref in %d input node %s, use output offset %ld update %ld", node->GetName().c_str(), + out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), + output_list_this.at(out2ins.begin()->first), peer_output_offset); } else { - GELOGW("Node %s out %d ref in %d with total ref numbers %zu", node->GetName().c_str(), out2ins.begin()->first, - out2ins.begin()->second, out2ins.size()); + GELOGD("Node %s out %d ref in %d input node %s with total ref numbers %zu", node->GetName().c_str(), + out2ins.begin()->first, out2ins.begin()->second, peer_op_desc->GetName().c_str(), out2ins.size()); } // first input is beginning offset mem_offset = output_list.at(peer_out_data_anchor->GetIdx()); @@ -1535,6 +1539,11 @@ ge::Status GraphMemoryAssigner::GetAllRef(const NodePtr &node, map &node_2_continuous_type) { for (const auto &in_node : input_continuous_node->GetInDataNodes()) { + if (in_node->GetType() == VARIABLE) { + GELOGI("node %s 's precursor node %s is variable, do not store.", input_continuous_node->GetName().c_str(), + in_node->GetName().c_str()); + return true; + } auto iter = node_2_continuous_type.find(in_node); // In node's topo order in the front, so function can not be exception auto continuous_type = iter->second; @@ -1560,13 +1569,15 @@ bool GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcessDirectly( } ge::Status GraphMemoryAssigner::AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, - uint32_t continuous_type) { + uint32_t continuous_type, + bool reverse_refresh) { int64_t mem_clean_start = 0; int64_t mem_clean_size = 0; int64_t memory_type = RT_MEMORY_HBM; GE_CHK_STATUS_RET(GetNodeMemoryType(input_continuous_node, memory_type, "input"), "Get node memory type failed."); - auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, continuous_type); + auto ret = AssignContinuousInputMemory(input_continuous_node, mem_clean_start, mem_clean_size, memory_type, + continuous_type, reverse_refresh); if (ret != ge::SUCCESS) { GELOGE(ret, "Assign continuous input memory failed!"); return ret; diff --git a/ge/graph/build/memory/graph_mem_assigner.h b/ge/graph/build/memory/graph_mem_assigner.h index b56c3716..f4d1366d 100755 --- a/ge/graph/build/memory/graph_mem_assigner.h +++ b/ge/graph/build/memory/graph_mem_assigner.h @@ -131,13 +131,14 @@ class GraphMemoryAssigner { std::map &node_2_continuous_type); ge::Status AssignContinuousInputMemoryWithAtomicProcess(const NodePtr &input_continuous_node, - uint32_t continuous_type); + uint32_t continuous_type, bool reverse_refresh=false); ge::Status FilterAtomicNodesForMemoryAssign(map>> &normal_atomic_nodes_map, map> &connecting_output_atomic_nodes); ge::Status AssignContinuousInputMemory(const ge::NodePtr &node, int64_t &continuous_mem_start, - int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type); + int64_t &continuous_mem_size, int64_t memory_type, uint32_t continuous_type, + bool reverse_refresh = false); ge::Status AssignContinuousOutputMemory(const ge::NodePtr &node, int64_t memory_type, uint32_t continuous_type); diff --git a/tests/ut/ge/graph/build/mem_assigner_unittest.cc b/tests/ut/ge/graph/build/mem_assigner_unittest.cc index f53a0732..0024185b 100644 --- a/tests/ut/ge/graph/build/mem_assigner_unittest.cc +++ b/tests/ut/ge/graph/build/mem_assigner_unittest.cc @@ -25,10 +25,12 @@ #include "graph/utils/op_desc_utils.h" #include "graph/utils/tensor_utils.h" #include "omg/omg_inner_types.h" +#include "../passes/graph_builder_utils.h" #define protected public #define private public #include "graph/build/memory/binary_block_mem_assigner.h" +#include "graph/build/memory/graph_mem_assigner.h" #include "graph/build/memory/hybrid_mem_assigner.h" #include "graph/build/memory/max_block_mem_assigner.h" #undef protected @@ -41,7 +43,7 @@ using domi::GetContext; class UtestMemoryAssignerTest : public testing::Test { public: - ge::OpDescPtr createOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { ge::OpDescPtr op_def = make_shared(name, type); auto desc_temp_ptr = make_shared(); auto desc_temp = *desc_temp_ptr; @@ -55,26 +57,46 @@ class UtestMemoryAssignerTest : public testing::Test { op_def->SetWorkspaceBytes(workspace_bytes); return op_def; } - void make_graph(ge::ComputeGraphPtr graph) { - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); + ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") { + ge::OpDescPtr op_def = make_shared(name, type); + auto desc_temp_ptr = make_shared(); + auto desc_temp = *desc_temp_ptr; + + TensorUtils::SetSize(desc_temp, 1024); + op_def->AddInputDesc(desc_temp); + + auto desc_output_ptr = make_shared(); + auto desc_output = *desc_output_ptr; + TensorUtils::SetSize(desc_output, 6500); + ge::TensorUtils::SetReuseInput(desc_output, true); + ge::TensorUtils::SetReuseInputIndex(desc_output, 0); + op_def->AddOutputDesc(desc_output); + + std::vector workspace_bytes; + workspace_bytes.push_back(wsByte); + op_def->SetWorkspaceBytes(workspace_bytes); + return op_def; + } + void MakeGraph(ge::ComputeGraphPtr &graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); op_def_a->SetStreamId(0); - ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); op_def_b->SetStreamId(0); - ge::OpDescPtr op_def_c = createOpWithWsSize("C", 16000); + ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000); op_def_c->SetStreamId(1); - ge::OpDescPtr op_def_d = createOpWithWsSize("D", 24000); + ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000); op_def_d->SetStreamId(2); - ge::OpDescPtr op_def_e = createOpWithWsSize("E", 24000); + ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000); op_def_e->SetStreamId(3); - ge::OpDescPtr op_def_f = createOpWithWsSize("F", 30000); + ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000); op_def_f->SetStreamId(2); - ge::OpDescPtr op_def_g = createOpWithWsSize("G", 32000); + ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000); op_def_g->SetStreamId(3); - ge::OpDescPtr op_def_h = createOpWithWsSize("H", 48000); + ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000); op_def_h->SetStreamId(2); - ge::OpDescPtr op_def_i = createOpWithWsSize("I", 60000); + ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000); op_def_i->SetStreamId(2); - ge::OpDescPtr op_def_j = createOpWithWsSize("J", 256000, NETOUTPUT); + ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT); op_def_j->SetStreamId(3); // add node @@ -108,24 +130,10 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } - void make_reuse_graph(ge::ComputeGraphPtr graph) { - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); - ge::OpDescPtr op_def_b = createOpWithWsSize("B", 120000); - - ge::OpDescPtr op_def_c = make_shared("C", "Some"); - auto desc_input_ptr = make_shared(); - auto desc_input = *desc_input_ptr; - - TensorUtils::SetSize(desc_input, 1024); - op_def_c->AddInputDesc(desc_input); - - auto desc_output_ptr = make_shared(); - auto desc_output = *desc_output_ptr; - TensorUtils::SetSize(desc_output, 6500); - ge::TensorUtils::SetReuseInput(desc_output, true); - ge::TensorUtils::SetReuseInputIndex(desc_output, 0); - op_def_c->AddOutputDesc(desc_output); - + void MakeReuseGraph(ge::ComputeGraphPtr graph) { + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); + ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000); + ge::OpDescPtr op_def_c = CreateRefOpWithWsSize("C", 120000); ge::OpDescPtr op_def_d = make_shared("D", "CONSTANT"); ge::NodePtr node_a = graph->AddNode(op_def_a); @@ -141,6 +149,47 @@ class UtestMemoryAssignerTest : public testing::Test { graph->TopologicalSorting(); } + ComputeGraphPtr MakeCascadeContinuousMemoryGraph() { + ge::ut::GraphBuilder builder("graph"); + auto data = builder.AddNode("data", "Data", 1, 1); + auto addn1 = builder.AddNode("addn1", "AddN", 1, 1); + auto addn2 = builder.AddNode("addn2", "AddN", 1, 1); + auto addn3 = builder.AddNode("addn3", "AddN", 1, 1); + auto concat1 = builder.AddNode("concat1", "Concat", 2, 1); + auto concat2 = builder.AddNode("concat2", "Concat", 2, 1); + auto netoutput = builder.AddNode("netoutput", "NetOutput", 2, 0); + + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + ge::AttrUtils::SetBool(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); + + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_NOPADDING_CONTINUOUS_INPUT, true); + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_CONTINUOUS_INPUT_ALLOC, true); + ge::AttrUtils::SetBool(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_REUSE_INPUT, true); + + addn1->GetOpDesc()->SetOutputOffset({100}); + addn2->GetOpDesc()->SetOutputOffset({200}); + concat1->GetOpDesc()->SetOutputOffset({100}); + addn3->GetOpDesc()->SetOutputOffset({700}); + concat2->GetOpDesc()->SetOutputOffset({500}); + + ge::AttrUtils::SetListInt(addn1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(addn2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(addn3->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {100}); + ge::AttrUtils::SetListInt(concat1->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {200}); + ge::AttrUtils::SetListInt(concat2->GetOpDesc(), ATTR_NAME_OUTPUT_OFFSET_FOR_BUFFER_FUSION, {300}); + + + builder.AddDataEdge(data, 0, addn1, 0); + builder.AddDataEdge(data, 0, addn2, 0); + builder.AddDataEdge(addn1, 0, concat1, 0); + builder.AddDataEdge(addn2, 0, concat1, 1); + builder.AddDataEdge(concat1, 0, concat2, 0); + builder.AddDataEdge(addn3, 0, concat2, 1); + + return builder.GetGraph(); + } + protected: void SetUp() {} @@ -150,7 +199,7 @@ class UtestMemoryAssignerTest : public testing::Test { /* TEST_F(UtestMemoryAssignerTest, MemoryBlock_Resize_RealSizeList_is_empty) { ge::ComputeGraphPtr graph = make_shared(""); - ge::OpDescPtr op_def_a = createOpWithWsSize("A", 6000); + ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000); ge::NodePtr node_a = graph->AddNode(op_def_a); MemoryBlock* memory_block = new MemoryBlock(0); memory_block->Init(1, kOutput, node_a, 0, 1); @@ -178,7 +227,7 @@ class MockBlockMemAssigner : public BlockMemAssigner { // when check GetMemoryRanges return fail, Assign return fail TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { ge::ComputeGraphPtr graph = make_shared(""); - make_graph(graph); + MakeGraph(graph); std::map anchor_to_symbol; std::map> symbol_to_anchors; EXPECT_EQ(GraphUtils::GetRefMapping(graph, symbol_to_anchors, anchor_to_symbol), GRAPH_SUCCESS); @@ -186,3 +235,17 @@ TEST_F(UtestMemoryAssignerTest, Mock_block_mem_assigner_failed) { MockBlockMemAssigner mock_assigner(graph, anchor_to_symbol, symbol_to_anchors); EXPECT_EQ(mock_assigner.Assign(), FAILED); } + +TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) { + ge::ComputeGraphPtr graph = MakeCascadeContinuousMemoryGraph(); + auto addn1 = graph->FindNode("addn1"); + auto addn2 = graph->FindNode("addn2"); + EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 100); + EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 200); + GraphMemoryAssigner memoryAssigner(graph); + MemoryOffset memory_offset(RT_MEMORY_HBM, 0); + memoryAssigner.memory_offset_.emplace(RT_MEMORY_HBM, memory_offset); + EXPECT_EQ(memoryAssigner.ReAssignContinuousMemory(false), GRAPH_SUCCESS); + EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500); + EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600); +}