You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
graphengine/ge/graph/manager/graph_caching_allocator.cc

425 lines
12 KiB

/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "graph/manager/graph_caching_allocator.h"
#include <set>
#include <string>
#include <utility>
#include "framework/common/debug/ge_log.h"
#include "graph/manager/graph_mem_allocator.h"
namespace ge {
const size_t bin_ranges[kNumBins] = {kRoundBlockSize * kKByteSize,
kBinSizeUnit8 * kMByteSize,
kBinSizeUnit32 * kMByteSize,
kBinSizeUnit128 * kMByteSize,
kBinSizeUnit256 * kMByteSize,
kBinSizeUnit512 * kMByteSize,
kGByteSize};
static bool BlockComparator(const Block *left, const Block *right) {
if (left->size != right->size) {
return left->size < right->size;
}
return reinterpret_cast<uintptr_t>(left->ptr) < reinterpret_cast<uintptr_t>(right->ptr);
}
bool CanMerge(Block *block) {
if (block == nullptr || block->allocated || !block->IsSplit()) {
return false;
}
return true;
}
size_t GetBinIndex(size_t size) {
size_t index = 0;
for (auto range : bin_ranges) {
if (size <= range) {
break;
}
++index;
}
if (index > kNumBins - 1) {
index = kNumBins - 1;
}
return index;
}
size_t GetAllocationSize(size_t size) {
size_t index = GetBinIndex(size);
if (bin_ranges[index] >= size) {
return bin_ranges[index];
}
return kGByteSize * ((size + kGByteSize - 1) / kGByteSize);
}
///
/// @ingroup ge_graph
/// @brief block size based on alignment
/// @param [in] original malloc size
/// @return allocation size
///
size_t GetBlockSize(size_t size) {
if (size == 0) {
return kRoundBlockSize;
}
return kRoundBlockSize * ((size + kRoundBlockSize - 1) / kRoundBlockSize);
}
bool ShouldSplit(const Block *block, size_t size) {
return static_cast<double>(size) <= (static_cast<double>(block->size) * kSplitThreshold);
}
void IncreaseCount(std::map<size_t, size_t> &count, size_t size) {
auto it = count.find(size);
if (it != count.end()) {
it->second++;
} else {
count.emplace(size, 1);
}
}
CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) {
for (uint32_t i = 0; i < kNumBins; ++i) {
free_block_bins_[i] = nullptr;
}
}
Status CachingAllocator::Initialize(uint32_t device_id) {
GELOGI("Device id %u", device_id);
// when redo Initialize free old memory
FreeBlocks();
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
if (free_block_bins_[i] != nullptr) {
continue;
}
auto bin_ptr = new (std::nothrow) BlockBin(BlockComparator);
if (bin_ptr == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Alloc BlockBin failed.");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
free_block_bins_[i] = bin_ptr;
}
memory_allocator_ = MemManager::Instance(memory_type_);
if (memory_allocator_ == nullptr) {
return ACL_ERROR_GE_INTERNAL_ERROR;
}
return ge::SUCCESS;
}
void CachingAllocator::Finalize(uint32_t device_id) {
GELOGI("Device id %u", device_id);
PrintStatics();
FreeBlocks();
FreeBlockBins();
}
uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) {
GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id);
uint8_t *ptr = nullptr;
size = GetBlockSize(size);
Block *block = FindFreeBlock(size, org_ptr, device_id);
if (block != nullptr) {
ptr = block->ptr;
} else {
if (ge::SUCCESS == TryExtendCache(size, device_id)) {
block = FindFreeBlock(size, org_ptr, device_id);
if (block != nullptr) {
ptr = block->ptr;
}
}
}
if (ptr == nullptr) {
GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size);
}
return ptr;
}
Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) {
GELOGI("Free device id = %u", device_id);
if (ptr == nullptr) {
GELOGE(PARAM_INVALID, "Invalid memory pointer");
return ge::PARAM_INVALID;
}
std::lock_guard<std::recursive_mutex> lock(mutex_);
auto it = allocated_blocks_.find(ptr);
if (it == allocated_blocks_.end()) {
GELOGE(PARAM_INVALID, "Invalid memory pointer");
return ge::PARAM_INVALID;
}
Block *block = it->second;
allocated_blocks_.erase(it);
FreeBlock(block);
return ge::SUCCESS;
}
void CachingAllocator::FreeBlock(Block *block) {
if (block == nullptr || !block->allocated) {
return;
}
GELOGI("Free block size = %zu", block->size);
std::lock_guard<std::recursive_mutex> lock(mutex_);
block->allocated = false;
auto &bin = *block->bin;
Block *merge_blocks[] = {block->prev, block->next};
for (Block *merge_block : merge_blocks) {
MergeBlocks(block, merge_block, bin);
}
bin.insert(block);
}
void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) {
if (!CanMerge(dst) || !CanMerge(src)) {
return;
}
if (dst->prev == src) {
dst->ptr = src->ptr;
dst->prev = src->prev;
if (dst->prev != nullptr) {
dst->prev->next = dst;
}
} else {
dst->next = src->next;
if (dst->next != nullptr) {
dst->next->prev = dst;
}
}
dst->size += src->size;
bin.erase(src);
delete src;
}
BlockBin *CachingAllocator::GetBlockBin(size_t size) {
size_t index = GetBinIndex(size);
return free_block_bins_[index];
}
Block *CachingAllocator::FindFreeBlock(size_t size, uint8_t *org_ptr, uint32_t device_id) {
Block key(device_id, size, org_ptr);
BlockBin *bin = GetBlockBin(size);
if (bin == nullptr) {
GELOGE(ge::FAILED, "Get block bin failed size = %zu", size);
return nullptr;
}
std::lock_guard<std::recursive_mutex> lock(mutex_);
auto it = bin->lower_bound(&key);
if (it != bin->end()) {
Block *block = *it;
bin->erase(it);
if (block != nullptr) {
GELOGI("Find block size = %zu", block->size);
if (ShouldSplit(block, size)) {
block = SplitBlock(block, size, *bin, device_id);
}
if (block->ptr != nullptr) {
block->allocated = true;
allocated_blocks_[block->ptr] = block;
GELOGI("Malloc device id = %u, size= %zu", device_id, size);
}
}
return block;
}
return nullptr;
}
Block *CachingAllocator::SplitBlock(Block *block, size_t size, BlockBin &bin, uint32_t device_id) {
// block has been checked, should not be nullptr
Block *remaining = block;
Block *new_block = new (std::nothrow) Block(device_id, size, &bin, block->ptr);
if (new_block == nullptr) {
GELOGE(ge::FAILED, "Alloc block failed size = %zu", size);
return block;
}
new_block->prev = remaining->prev;
if (new_block->prev != nullptr) {
new_block->prev->next = new_block;
}
new_block->next = remaining;
remaining->prev = new_block;
remaining->ptr = remaining->ptr + size;
remaining->size -= size;
bin.insert(remaining);
return new_block;
}
Status CachingAllocator::TryExtendCache(size_t size, uint32_t device_id) {
GELOGI("Try to extend cache. size = %zu, device id = %u", size, device_id);
auto memory_size = GetAllocationSize(size);
const std::string purpose = "Memory for caching.";
auto memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id);
// try to free caches and malloc again when malloc memory failed
if (memory_addr == nullptr) {
size_t free_cached_memory_size = FreeCachedBlocks();
memory_addr = memory_allocator_->MallocMemory(purpose, memory_size, device_id);
if (memory_addr == nullptr) {
GELOGE(ge::FAILED, "TryExtendCache failed, no enough memory for size = %zu, device_id = %u", memory_size,
device_id);
return ge::FAILED;
}
GELOGT(TRACE_RUNNING, "Try to free cached memory size:%zu and malloc memory size:%zu success.",
free_cached_memory_size, memory_size);
}
if (AddToBlockBin(memory_addr, memory_size, device_id) != ge::SUCCESS) {
(void)memory_allocator_->FreeMemory(memory_addr);
return ge::FAILED;
}
PrintStatics();
return ge::SUCCESS;
}
Status CachingAllocator::AddToBlockBin(uint8_t *ptr, size_t size, uint32_t device_id) {
BlockBin *bin = GetBlockBin(size);
if (bin == nullptr) {
GELOGE(ge::FAILED, "Get block bin failed size = %zu", size);
return ge::FAILED;
}
Block *block = new (std::nothrow) Block(device_id, size, bin, nullptr);
if (block == nullptr) {
GELOGE(ge::FAILED, "Alloc block failed size = %zu", size);
return ge::FAILED;
}
GELOGI("Block size = %zu", size);
block->ptr = ptr;
block->size = size;
std::lock_guard<std::recursive_mutex> lock(mutex_);
IncreaseCount(malloced_memory_, block->size);
bin->insert(block);
return ge::SUCCESS;
}
size_t CachingAllocator::FreeCachedBlocks() {
GELOGI("Free cached blocks");
std::lock_guard<std::recursive_mutex> lock(mutex_);
size_t free_cached_memory_size = 0;
for (uint32_t i = 0; i < kNumBins; ++i) {
auto pool = free_block_bins_[i];
if (pool == nullptr) {
continue;
}
for (auto it = pool->begin(); it != pool->end();) {
Block *block = *it;
// free block memory that has not been split
if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) &&
(memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) {
auto itcount = malloced_memory_.find(block->size);
free_cached_memory_size += block->size;
if (itcount != malloced_memory_.end()) {
itcount->second--;
if (itcount->second == 0) {
malloced_memory_.erase(itcount);
}
}
pool->erase(it++);
delete block;
continue;
}
++it;
}
}
return free_cached_memory_size;
}
void CachingAllocator::FreeBlocks() {
GELOGI("Free blocks");
std::lock_guard<std::recursive_mutex> lock(mutex_);
// free allocated blocks and put to cache
for (auto &it : allocated_blocks_) {
FreeBlock(it.second);
}
allocated_blocks_.clear();
(void) FreeCachedBlocks();
}
void CachingAllocator::FreeBlockBins() {
GELOGI("Free block bins");
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
if (free_block_bins_[i] != nullptr) {
delete free_block_bins_[i];
free_block_bins_[i] = nullptr;
}
}
}
void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) {
GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count);
for (auto &it : count) {
GELOGI(" |- block[size:%10zu count:%10zu]", it.first, it.second);
}
}
void CachingAllocator::PrintStatics() {
if (!IsLogEnable(GE_MODULE_NAME, DLOG_INFO)) {
return;
}
size_t total_using_size = 0;
size_t total_using_count = 0;
size_t total_free_size = 0;
size_t total_free_count = 0;
size_t total_malloc_size = 0;
size_t total_malloc_count = 0;
std::map<size_t, size_t> using_block;
std::map<size_t, size_t> free_block;
std::map<size_t, size_t> malloc_block;
do {
std::lock_guard<std::recursive_mutex> lock(mutex_);
for (uint32_t i = 0; i < kNumBins; ++i) {
auto pool = free_block_bins_[i];
if (pool == nullptr) {
continue;
}
for (auto it = pool->begin(); it != pool->end(); ++it) {
if ((*it) != nullptr) {
total_free_size += (*it)->size;
IncreaseCount(free_block, (*it)->size);
total_free_count++;
}
}
}
for (auto &it : allocated_blocks_) {
if (it.second != nullptr) {
total_using_size += it.second->size;
IncreaseCount(using_block, it.second->size);
total_using_count++;
}
}
for (auto &it : malloced_memory_) {
total_malloc_size += it.first * it.second;
total_malloc_count += it.second;
malloc_block[it.first] = it.second;
}
} while (0);
PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count);
PrintCount(using_block, "Using", total_using_size, total_using_count);
PrintCount(free_block, "Free", total_free_size, total_free_count);
}
} // namespace ge