|
|
|
@ -21,22 +21,32 @@
|
|
|
|
|
namespace mindspore {
|
|
|
|
|
namespace device {
|
|
|
|
|
namespace ascend {
|
|
|
|
|
constexpr uint64_t kAscendDeviceMemGB = 30;
|
|
|
|
|
constexpr uint64_t kAscendDeviceMemGB = 26;
|
|
|
|
|
constexpr uint64_t kAscendMemPoolGB = 4;
|
|
|
|
|
constexpr uint64_t kMemSizeGB = 30;
|
|
|
|
|
constexpr uint64_t kMaxMemSizeGB = 30;
|
|
|
|
|
constexpr uint64_t kAscendDeviceMemSize = (kAscendDeviceMemGB << kMemSizeGB);
|
|
|
|
|
constexpr uint64_t kAscendMemPoolSize = (kAscendMemPoolGB << kMemSizeGB);
|
|
|
|
|
|
|
|
|
|
void AscendMemoryManager::MallocDeviceMemory() {
|
|
|
|
|
auto context_mem = GetDeviceMemSizeFromContext();
|
|
|
|
|
device_mem_size_ = context_mem == 0 ? kAscendDeviceMemSize : context_mem;
|
|
|
|
|
dynamic_mem_offset_ = device_mem_size_;
|
|
|
|
|
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), dynamic_mem_offset_, RT_MEMORY_HBM);
|
|
|
|
|
static_mem_offset_ = device_mem_size_;
|
|
|
|
|
auto ret = rtMalloc(reinterpret_cast<void **>(&device_mem_base_), static_mem_offset_, RT_MEMORY_HBM);
|
|
|
|
|
|
|
|
|
|
if (ret != RT_ERROR_NONE) {
|
|
|
|
|
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << dynamic_mem_offset_ << "] fail, ret[" << ret << "]";
|
|
|
|
|
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << static_mem_offset_ << "] fail, ret[" << ret << "]";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_base_);
|
|
|
|
|
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
|
|
|
|
|
if (context_mem == 0) {
|
|
|
|
|
device_mem_pool_size_ = kAscendMemPoolSize;
|
|
|
|
|
ret = rtMalloc(reinterpret_cast<void **>(&device_mem_pool_base_), device_mem_pool_size_, RT_MEMORY_HBM);
|
|
|
|
|
if (ret != RT_ERROR_NONE) {
|
|
|
|
|
MS_EXCEPTION(DeviceProcessError) << "rtMalloc mem size[" << device_mem_pool_size_ << "] fail, ret[" << ret << "]";
|
|
|
|
|
}
|
|
|
|
|
AscendMemoryPool::GetInstance().set_device_mem_pool_base(device_mem_pool_base_);
|
|
|
|
|
AscendMemoryPool::GetInstance().set_device_mem_pool_size(device_mem_pool_size_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
|
|
|
|
@ -54,7 +64,7 @@ uint64_t AscendMemoryManager::GetDeviceMemSizeFromContext() {
|
|
|
|
|
auto gb_str = variable_memory_max_size.substr(0, pos);
|
|
|
|
|
auto gb_var = std::stoull(gb_str);
|
|
|
|
|
MS_LOG(INFO) << "variable_memory_max_size(GB):" << gb_var;
|
|
|
|
|
if (gb_var > kAscendDeviceMemGB || gb_var == 0) {
|
|
|
|
|
if (gb_var > kMaxMemSizeGB || gb_var == 0) {
|
|
|
|
|
MS_LOG(EXCEPTION) << "Invalid allocate memory size:" << gb_var << " which should be in (0-30]GB";
|
|
|
|
|
}
|
|
|
|
|
return gb_var << kMemSizeGB;
|
|
|
|
@ -77,71 +87,8 @@ void AscendMemoryManager::FreeDeviceMemory() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void AscendMemoryManager::ResetDynamicMemory() {
|
|
|
|
|
total_dynamic_size_ = 0;
|
|
|
|
|
dynamic_mem_offset_ = device_mem_size_;
|
|
|
|
|
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *AscendMemoryManager::MallocMemFromMemPool(size_t size) {
|
|
|
|
|
auto align_size = GetCommonAlignSize(size);
|
|
|
|
|
return AscendMemoryPool::GetInstance().AllocTensorMem(align_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_mem) {
|
|
|
|
|
size_t align_size = 0;
|
|
|
|
|
if (communication_mem) {
|
|
|
|
|
align_size = GetCommunicationAlignSize(size);
|
|
|
|
|
} else {
|
|
|
|
|
align_size = GetCommonAlignSize(size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
|
|
|
|
|
MS_LOG(INFO) << "Malloc Memory: Static, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
|
|
|
|
|
<< "] memory pool[" << device_mem_pool_offset << "])"
|
|
|
|
|
<< " malloc [" << align_size << "]";
|
|
|
|
|
|
|
|
|
|
if (communication_mem) {
|
|
|
|
|
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
|
|
|
|
|
uint8_t *alloc_address = reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
|
|
|
|
return alloc_address + kMemAlignSize;
|
|
|
|
|
} else {
|
|
|
|
|
return reinterpret_cast<uint8_t *>(AscendMemoryPool::GetInstance().AllocTensorMem(align_size));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t *AscendMemoryManager::MallocDynamicMem(size_t size, bool communication_mem) {
|
|
|
|
|
size_t align_size = 0;
|
|
|
|
|
if (communication_mem) {
|
|
|
|
|
align_size = GetCommunicationAlignSize(size);
|
|
|
|
|
} else {
|
|
|
|
|
align_size = GetCommonAlignSize(size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto device_mem_pool_offset = AscendMemoryPool::GetInstance().device_mem_pool_offset();
|
|
|
|
|
MS_LOG(INFO) << "Malloc Memory: Dynamic, total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
|
|
|
|
|
<< "] memory pool[" << device_mem_pool_offset << "])"
|
|
|
|
|
<< " malloc [" << align_size << "]";
|
|
|
|
|
|
|
|
|
|
if (dynamic_mem_offset_ < align_size) {
|
|
|
|
|
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
|
|
|
|
|
<< "]) malloc [" << align_size << "] failed!";
|
|
|
|
|
}
|
|
|
|
|
auto new_offset = dynamic_mem_offset_ - align_size;
|
|
|
|
|
if (new_offset <= device_mem_pool_offset) {
|
|
|
|
|
MS_LOG(EXCEPTION) << "Out of memory!!! total[" << device_mem_size_ << "] (dynamic[" << total_dynamic_size_
|
|
|
|
|
<< "] memory pool[" << device_mem_pool_offset << "])"
|
|
|
|
|
<< " malloc [" << align_size << "] failed!";
|
|
|
|
|
}
|
|
|
|
|
total_dynamic_size_ += align_size;
|
|
|
|
|
dynamic_mem_offset_ = new_offset;
|
|
|
|
|
AscendMemoryPool::GetInstance().set_graph_dynamic_mem_offset(dynamic_mem_offset_);
|
|
|
|
|
if (communication_mem) {
|
|
|
|
|
// create protect area [kMemAlignSize -- data -- kMemAlignSize]
|
|
|
|
|
return device_mem_base_ + new_offset + kMemAlignSize;
|
|
|
|
|
} else {
|
|
|
|
|
return device_mem_base_ + new_offset;
|
|
|
|
|
}
|
|
|
|
|
return AscendMemoryPool::GetInstance().AllocTensorMem(size);
|
|
|
|
|
}
|
|
|
|
|
} // namespace ascend
|
|
|
|
|
} // namespace device
|
|
|
|
|