diff --git a/ge/graph/manager/graph_caching_allocator.cc b/ge/graph/manager/graph_caching_allocator.cc index bfef4001..ca5a6c7d 100644 --- a/ge/graph/manager/graph_caching_allocator.cc +++ b/ge/graph/manager/graph_caching_allocator.cc @@ -28,10 +28,9 @@ const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize, kBinSizeUnit8 * kMByteSize, kBinSizeUnit32 * kMByteSize, kBinSizeUnit128 * kMByteSize, - kGByteSize, - kBinSizeUnit4 * kGByteSize, - kBinSizeUnit16 * kGByteSize, - kBinSizeUnit26 * kGByteSize}; + kBinSizeUnit256 * kMByteSize, + kBinSizeUnit512 * kMByteSize, + kGByteSize}; static bool BlockComparator(const Block *left, const Block *right) { if (left->size != right->size) { @@ -63,7 +62,10 @@ size_t GetBinIndex(size_t size) { size_t GetAllocationSize(size_t size) { size_t index = GetBinIndex(size); - return bin_ranges[index]; + if (bin_ranges[index] >= size) { + return bin_ranges[index]; + } + return kGByteSize * ((size + kGByteSize - 1) / kGByteSize); } /// @@ -119,6 +121,7 @@ void CachingAllocator::Finalize(uint32_t device_id) { } uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) { + GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id); uint8_t *ptr = nullptr; size = GetBlockSize(size); Block *block = FindFreeBlock(size, org_ptr, device_id); @@ -253,6 +256,7 @@ Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, ui } Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) { + GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id); auto memory_size = GetAllocationSize(size); const std::string purpose = "Memory for caching."; auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id); diff --git a/ge/graph/manager/graph_caching_allocator.h b/ge/graph/manager/graph_caching_allocator.h index e024d5cd..42d0952d 100644 --- a/ge/graph/manager/graph_caching_allocator.h +++ b/ge/graph/manager/graph_caching_allocator.h @@ -36,17 +36,17 @@ namespace ge { constexpr size_t kRoundBlockSize = 512; // all block sizes are rounded to at least 512 bytes constexpr size_t kBinSizeUnit4 = 4; constexpr size_t kBinSizeUnit8 = 8; -constexpr size_t kBinSizeUnit16 = 16; -constexpr size_t kBinSizeUnit26 = 26; constexpr size_t kBinSizeUnit32 = 32; constexpr size_t kBinSizeUnit128 = 128; +constexpr size_t kBinSizeUnit256 = 256; +constexpr size_t kBinSizeUnit512 = 512; -constexpr double kSplitThreshold = 0.75; // split when malloc size <= small block size * kSpliThreshold +constexpr double kSplitThreshold = 0.5; // split when malloc size <= small block size * kSpliThreshold constexpr size_t kKByteSize = 1024; constexpr size_t kMByteSize = 1048576; // 1024 * 1024 constexpr size_t kGByteSize = 1073741824; // 1024 * 1024 * 1024 -static const uint32_t kNumBins = 8; +static const uint32_t kNumBins = 7; class MemoryAllocator; diff --git a/ge/graph/passes/flow_ctrl_pass.cc b/ge/graph/passes/flow_ctrl_pass.cc index 9d441ed5..52a570cb 100755 --- a/ge/graph/passes/flow_ctrl_pass.cc +++ b/ge/graph/passes/flow_ctrl_pass.cc @@ -37,7 +37,7 @@ Status FlowCtrlPass::Run(ComputeGraphPtr compute_graph) { return NOT_CHANGED; } - GELOGI("FlowCtrl pass begin"); + GELOGI("FlowCtrl pass begin.graph is [%s]", compute_graph->GetName().c_str()); bool graph_change = false; // 1. Add FP/BP flow ctrl (big cycle) for (auto &node : compute_graph->GetDirectNode()) { diff --git a/ge/graph/passes/net_output_pass.cc b/ge/graph/passes/net_output_pass.cc index c6ab062a..c553607f 100644 --- a/ge/graph/passes/net_output_pass.cc +++ b/ge/graph/passes/net_output_pass.cc @@ -458,7 +458,7 @@ Status NetOutputPass::Run(ge::ComputeGraphPtr graph) { GELOGE(GE_GRAPH_PARAM_NULLPTR, "Compute graph is null."); return GE_GRAPH_PARAM_NULLPTR; } - GELOGI("NetOutputPass Run."); + GELOGI("NetOutputPass Run.graph is [%s]", graph->GetName().c_str()); NodePtr output_node = graph->FindFirstNodeMatchType(NETOUTPUT); // save user targets node SaveAndRemoveTargets(graph); diff --git a/ge/graph/passes/prune_pass.cc b/ge/graph/passes/prune_pass.cc index f5f4cbcb..3c95f3b1 100644 --- a/ge/graph/passes/prune_pass.cc +++ b/ge/graph/passes/prune_pass.cc @@ -27,12 +27,11 @@ namespace ge { Status PrunePass::Run(ge::ComputeGraphPtr graph) { - GELOGD("PrunePass Start"); + GELOGD("PrunePass Start, graph is [%s]", graph->GetName().c_str()); if (graph == nullptr) { GELOGE(GE_GRAPH_ISNULL, "input compute graph is NULL."); return GE_GRAPH_ISNULL; } - std::vector out_nodes; std::unordered_set nodes; for (NodePtr &node_ptr : graph->GetDirectNode()) { @@ -42,7 +41,6 @@ Status PrunePass::Run(ge::ComputeGraphPtr graph) { out_nodes.push_back(node_ptr); } } - if (out_nodes.empty()) { GELOGW("graph [%s] does not contain NETOUTPUT type node,no return value. Do nothing!", graph->GetName().c_str()); return ge::SUCCESS; diff --git a/ge/hybrid/executor/worker/execution_engine.cc b/ge/hybrid/executor/worker/execution_engine.cc index 7f4fa78c..fda65cb2 100755 --- a/ge/hybrid/executor/worker/execution_engine.cc +++ b/ge/hybrid/executor/worker/execution_engine.cc @@ -323,6 +323,8 @@ Status NodeDoneCallback::OnNodeDone() { node_item.NodeName().c_str()); } + // release workspace + context_->ReleaseWorkspace(); // release inputs for (int i = 0; i < context_->NumInputs(); ++i) { context_->ReleaseInput(i); diff --git a/ge/hybrid/node_executor/task_context.cc b/ge/hybrid/node_executor/task_context.cc index e3cf5ae1..085970e0 100644 --- a/ge/hybrid/node_executor/task_context.cc +++ b/ge/hybrid/node_executor/task_context.cc @@ -36,10 +36,6 @@ TaskContext::TaskContext(GraphExecutionContext *execution_context, TaskContext::~TaskContext() { GELOGD("[%s] TaskContext destroyed.", node_item_->NodeName().c_str()); - for (auto ws_addr : workspaces_) { - execution_context_->allocator->Deallocate(ws_addr); - } - // release output for (int i = 0; i < NumOutputs(); ++i) { auto output_tensor = MutableOutput(i); @@ -49,6 +45,13 @@ TaskContext::~TaskContext() { } } +void TaskContext::ReleaseWorkspace() { + GELOGD("[%s] Start ReleaseWorkspace.", node_item_->NodeName().c_str()); + for (auto ws_addr : workspaces_) { + execution_context_->allocator->Deallocate(ws_addr); + } +} + std::unique_ptr TaskContext::Create(NodeState *node_state, GraphExecutionContext *execution_context, SubgraphContext *subgraph_context) { diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h index dc4ff058..f29918b4 100644 --- a/ge/hybrid/node_executor/task_context.h +++ b/ge/hybrid/node_executor/task_context.h @@ -56,6 +56,7 @@ class TaskContext { void ReleaseInputsAndOutputs(); bool NeedCallback(); void ReleaseInput(int index); + void ReleaseWorkspace(); const TensorValue *GetInput(int index) const; const TensorValue *GetOutput(int index) const; TensorValue *MutableOutput(int index); diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index cfaf563e..688e393c 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -752,6 +752,7 @@ set(MULTI_PARTS_TEST_FILES "graph/build/mem_assigner_unittest.cc" "graph/preprocess/graph_preprocess_unittest.cc" "graph/manager/hcom_util_unittest.cc" + "graph/manager/graph_caching_allocator_unittest.cc" "session/omg_omg_unittest.cc" ) diff --git a/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc new file mode 100644 index 00000000..f76a4d4e --- /dev/null +++ b/tests/ut/ge/graph/manager/graph_caching_allocator_unittest.cc @@ -0,0 +1,87 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "graph/anchor.h" +#include "graph/attr_value.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/utils/graph_utils.h" +#include "graph/utils/node_utils.h" +#include "graph/utils/op_desc_utils.h" +#include "graph/utils/tensor_utils.h" +#include "omg/omg_inner_types.h" + +#define protected public +#define private public +#include "graph/manager/graph_caching_allocator.h" +#include "graph/manager/graph_mem_allocator.h" +#undef protected +#undef private + +using namespace std; +using namespace testing; +using namespace ge; +using domi::GetContext; + +class UtestGraphCachingAllocatorTest : public testing::Test { + protected: + void SetUp() {} + + void TearDown() { GetContext().out_nodes_map.clear(); } +}; + +TEST_F(UtestGraphCachingAllocatorTest, initialize_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, extend_malloc_success) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kBinSizeUnit32*kMByteSize); + EXPECT_NE(nullptr, ptr); + MemManager::Instance().Finalize(); +} + +TEST_F(UtestGraphCachingAllocatorTest, malloc_statics) { + std::vector mem_type; + mem_type.push_back(RT_MEMORY_HBM); + EXPECT_EQ(MemManager::Instance().Initialize(mem_type), SUCCESS); + uint8_t *ptr = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kMByteSize); + EXPECT_NE(nullptr, ptr); + uint8_t *ptr1 = MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Malloc(kKByteSize); + EXPECT_NE(nullptr, ptr); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr), SUCCESS); + EXPECT_EQ(MemManager::Instance().CachingInstance(RT_MEMORY_HBM).Free(ptr1), SUCCESS); + MemManager::Instance().CachingInstance(RT_MEMORY_HBM).FreeCachedBlocks(); + MemManager::Instance().Finalize(); +} \ No newline at end of file