From e9345e6a99ec02b8990344829cbda7a47feaced2 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 20:00:42 +0800 Subject: [PATCH 1/4] fix out of memory question --- ge/graph/manager/graph_caching_allocator.cc | 12 +-- ge/graph/manager/graph_caching_allocator.h | 8 +- ge/hybrid/executor/worker/execution_engine.cc | 2 + ge/hybrid/node_executor/task_context.cc | 11 ++- ge/hybrid/node_executor/task_context.h | 1 + tests/ut/ge/CMakeLists.txt | 1 + .../graph_caching_allocator_unittest.cc | 76 +++++++++++++++++++ 7 files changed, 98 insertions(+), 13 deletions(-) create mode 100644 tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index bfef4001..dd46e670 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, kBinSizeUnit8 * kMByteSize, kBinSizeUnit32 * kMByteSize, kBinSizeUnit128 * kMByteSize, - kGByteSize, - kBinSizeUnit4 * kGByteSize, - kBinSizeUnit16 * kGByteSize, - kBinSizeUnit26 * kGByteSize}; + kBinSizeUnit256 * kMByteSize, + kBinSizeUnit512 * kMByteSize, + kGByteSize}; static bool BlockComparator(const Block *left, const Block *right) { if (left->size != right->size) { @@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) { size_t GetAllocationSize(size_t size) { size_t index = GetBinIndex(size); - return bin_ranges[index]; + if (bin_ranges[index] >= size) { + return bin_ranges[index]; + } + return kGByteSize * ((size + kGByteSize - 1) / kGByteSize); } /// diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index e024d5cd..42d0952d 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -36,17 +36,17 @@ namespace ge { constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes constexpr size_t kBinSizeUnit4 = 4; constexpr size_t kBinSizeUnit8 = 8; -constexpr size_t kBinSizeUnit16 = 16; -constexpr size_t kBinSizeUnit26 = 26; constexpr size_t kBinSizeUnit32 = 32; constexpr size_t kBinSizeUnit128 = 128; +constexpr size_t kBinSizeUnit256 = 256; +constexpr size_t kBinSizeUnit512 = 512; -constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold +constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; constexpr size_t kMByteSize = 1048576; // 1024 * 1024 constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 -static const uint32_t kNumBins = 8; +static const uint32_t kNumBins = 7; class MemoryAllocator; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 7f4fa78c..fda65cb2 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -323,6 +323,8 @@ Status NodeDoneCallback::OnNodeDone() { node_item.NodeName().c_str()); } + // release workspace + context_->ReleaseWorkspace(); // release inputs for (int i = 0; i < context_->NumInputs(); ++i) { context_->ReleaseInput(i); diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index e3cf5ae1..085970e0 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context, TaskContext::~TaskContext() { GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); - for (auto ws_addr : workspaces_) { - execution_context_->allocator->Deallocate(ws_addr); - } - // release output for (int i = 0; i < NumOutputs(); ++i) { auto output_tensor = MutableOutput(i); @@ -49,6 +45,13 @@ TaskContext::~TaskContext() { } } +void TaskContext::ReleaseWorkspace() { + GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str()); + for (auto ws_addr : workspaces_) { + execution_context_->allocator->Deallocate(ws_addr); + } +} + std::unique_ptr TaskContext::Create(NodeState *node_state, GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) { diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index dc4ff058..f29918b4 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -56,6 +56,7 @@ class TaskContext { void ReleaseInputsAndOutputs(); bool NeedCallback(); void ReleaseInput(int index); + void ReleaseWorkspace(); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; TensorValue *MutableOutput(int index); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 184403ed..0247a694 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -752,6 +752,7 @@ set(MULTI_PARTS_TEST_FILES "graph/build/mem_assigner_unittest.cc" "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" + "graph/manager/graph_caching_allocator_unittest.cc" "session/omg_omg_unittest.cc" ) diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc new file mode 100644 index 00000000..efadcdf6 --- /dev/null +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" + +#define protected public +#define private public +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_allocator.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestGraphCachingAllocatorTest : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +TEST_F(UtestGraphCachingAllocatorTest, initialize_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS); + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks(); + MemManager::Instance().Finalize(); +} \ No newline at end of file From 6eb421f8904a2237ffc3609a44681f2d60810ea0 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 21:00:30 +0800 Subject: [PATCH 2/4] bugfix:fix print shape abnormal question --- ge/graph/manager/graph_caching_allocator.cc | 2 ++ .../graph/manager/graph_caching_allocator_unittest.cc | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index dd46e670..ca5a6c7d 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -121,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { } uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { + GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); uint8_t *ptr = nullptr; size = GetBlockSize(size); Block *block = FindFreeBlock(size, org_ptr, device_id); @@ -255,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui } Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { + GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); auto memory_size = GetAllocationSize(size); const std::string purpose = "Memory for caching."; auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc index efadcdf6..f76a4d4e 100644 --- a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -61,6 +61,17 @@ TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { MemManager::Instance().Finalize(); } +TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { std::vector mem_type; mem_type.push_back(RT_MEMORY_HBM); From d66ef5f2d0e1c93d3b8f2f078450020d56856508 Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 22:07:26 +0800 Subject: [PATCH 3/4] fix out of memory question --- ge/graph/passes/net_output_pass.cc | 2 +- ge/graph/passes/prune_pass.cc | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index c6ab062a..c553607f 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); return GE_GRAPH_PARAM_NULLPTR; } - GELOGI("NetOutputPass Run."); + GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str()); NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); // save user targets node SaveAndRemoveTargets(graph); diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index f5f4cbcb..3c95f3b1 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -27,12 +27,11 @@ namespace ge { Status PrunePass::Run(ge::ComputeGraphPtr graph) { - GELOGD("PrunePass Start"); + GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str()); if (graph == nullptr) { GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); return GE_GRAPH_ISNULL; } - std::vector out_nodes; std::unordered_set nodes; for (NodePtr &node_ptr : graph->GetDirectNode()) { @@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) { out_nodes.push_back(node_ptr); } } - if (out_nodes.empty()) { GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); return ge::SUCCESS; From 98995cbae1ef36888883e5ac198e870b229cf4ff Mon Sep 17 00:00:00 2001 From: wxl Date: Tue, 23 Feb 2021 22:14:25 +0800 Subject: [PATCH 4/4] fix out of memory question --- ge/graph/passes/flow_ctrl_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 9d441ed5..52a570cb 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { return NOT_CHANGED; } - GELOGI("FlowCtrl pass begin"); + GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); bool graph_change = false; // 1. Add FP/BP flow ctrl (big cycle) for (auto &node : compute_graph->GetDirectNode()) {