From 63f3a2caacf4263d0b33e9f7a15ef1ff3a966ea5 Mon Sep 17 00:00:00 2001 From: limingqi107 Date: Wed, 29 Apr 2020 10:38:08 +0800 Subject: [PATCH] gpu optimize some return values of dynamic memory pool --- .../ccsrc/device/gpu/gpu_kernel_runtime.cc | 22 ++++++++++++------- .../ccsrc/device/gpu/gpu_memory_manager.cc | 2 +- mindspore/ccsrc/device/kernel_runtime.cc | 15 ++++++++++--- mindspore/ccsrc/device/memory_manager.cc | 13 ++++++++--- mindspore/ccsrc/device/memory_manager.h | 4 ++-- .../mem_reuse/mem_dynamic_allocator.cc | 22 +++++++++++-------- 6 files changed, 52 insertions(+), 26 deletions(-) diff --git a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc index 2d53097dd8..17817ebeba 100644 --- a/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/device/gpu/gpu_kernel_runtime.cc @@ -225,23 +225,24 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod MS_EXCEPTION_IF_NULL(input); input->addr = device_address->ptr_; input->size = device_address->size_; - kernel_inputs->push_back(input); + kernel_inputs->emplace_back(input); } - auto output_sizes = kernel_mod.GetOutputSizeList(); for (size_t i = 0; i < output_sizes.size(); ++i) { auto device_address = AnfAlgo::GetMutableOutputAddr(kernel, i); MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr) { - mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } } kernel::AddressPtr output = std::make_shared(); MS_EXCEPTION_IF_NULL(output); output->addr = device_address->ptr_; output->size = output_sizes[i]; - kernel_outputs->push_back(output); + kernel_outputs->emplace_back(output); } - auto workspace_sizes = kernel_mod.GetWorkspaceSizeList(); for (size_t i = 0; i < workspace_sizes.size(); ++i) { if (workspace_sizes[i] == 0) { @@ -249,12 +250,14 @@ void GPUKernelRuntime::AllocKernelDynamicRes(const mindspore::kernel::KernelMod continue; } auto device_ptr = mem_manager_->MallocMemFromMemPool(workspace_sizes[i]); - MS_EXCEPTION_IF_NULL(device_ptr); + if (!device_ptr) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } kernel::AddressPtr workspace = std::make_shared(); MS_EXCEPTION_IF_NULL(workspace); workspace->addr = device_ptr; workspace->size = workspace_sizes[i]; - kernel_workspaces->push_back(workspace); + kernel_workspaces->emplace_back(workspace); } } @@ -334,7 +337,10 @@ void GPUKernelRuntime::AllocCommunicationOpMemory(bool is_need_alloc_memory, boo } } } - mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + auto ret = mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } } void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, diff --git a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc index 6e81130b9c..9a63921add 100644 --- a/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc +++ b/mindspore/ccsrc/device/gpu/gpu_memory_manager.cc @@ -40,7 +40,7 @@ void GPUMemoryManager::MallocDeviceMemory() { if (context_ptr->enable_dynamic_mem_pool()) { auto device_addr = MallocMemFromMemPool(1); if (!device_addr) { - MS_LOG(ERROR) << "Dynamic memory pool init error."; + MS_LOG(EXCEPTION) << "Dynamic memory pool init error."; } } else { // Need to reserve 20% space for dynamic memory diff --git a/mindspore/ccsrc/device/kernel_runtime.cc b/mindspore/ccsrc/device/kernel_runtime.cc index d1a068b584..d3fccc11fd 100644 --- a/mindspore/ccsrc/device/kernel_runtime.cc +++ b/mindspore/ccsrc/device/kernel_runtime.cc @@ -180,7 +180,10 @@ void KernelRuntime::RunOpAssignInputMemory(const std::vector auto device_address = CreateDeviceAddress(nullptr, tensor_size, AnfAlgo::GetOutputFormat(item, index), output_type_id); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, tensor_size); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, tensor_size); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetOutputAddr(device_address, index, item.get()); } } @@ -209,7 +212,10 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) { auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i); auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_sizes[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetOutputAddr(device_address, i, kernel.get()); } } @@ -224,7 +230,10 @@ void KernelRuntime::RunOpAssignWorkSpaceMemory(const AnfNodePtr &kernel) { for (size_t i = 0; i < workspace_lists.size(); ++i) { auto device_address = CreateDeviceAddress(nullptr, workspace_lists[i], "", kTypeUnknown); MS_EXCEPTION_IF_NULL(device_address); - mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); + auto ret = mem_manager_->MallocMemFromMemPool(device_address, workspace_lists[i]); + if (!ret) { + MS_LOG(EXCEPTION) << "Malloc device memory failed."; + } AnfAlgo::SetWorkspaceAddr(device_address, i, kernel.get()); } } diff --git a/mindspore/ccsrc/device/memory_manager.cc b/mindspore/ccsrc/device/memory_manager.cc index 8dd8dfb5e0..d2a38038c6 100644 --- a/mindspore/ccsrc/device/memory_manager.cc +++ b/mindspore/ccsrc/device/memory_manager.cc @@ -141,11 +141,14 @@ uint8_t *MemoryManager::MallocDynamicMem(size_t size, bool communication_mem) { } } -void MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { +bool MemoryManager::MallocMemFromMemPool(const DeviceAddressPtr address, size_t size) { auto device_ptr = MallocMemFromMemPool(size); - MS_EXCEPTION_IF_NULL(device_ptr); + if (!device_ptr) { + return false; + } address->ptr_ = device_ptr; address->from_mem_pool_ = true; + return true; } void *MemoryManager::MallocMemFromMemPool(size_t size) { @@ -168,9 +171,12 @@ void MemoryManager::FreeMemFromMemPool(void *device_ptr) { } } -void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, +bool MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, std::vector size_list) { auto device_ptr_list = MallocContinuousMemFromMemPool(total_size, size_list); + if (device_ptr_list.size() == 0) { + return false; + } if (addr_list.size() != device_ptr_list.size()) { MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; } @@ -180,6 +186,7 @@ void MemoryManager::MallocContinuousMemFromMemPool(const DeviceAddressPtrList ad addr_list[i]->ptr_ = device_ptr_list[i]; addr_list[i]->from_mem_pool_ = true; } + return true; } std::vector MemoryManager::MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list) { diff --git a/mindspore/ccsrc/device/memory_manager.h b/mindspore/ccsrc/device/memory_manager.h index dae0861506..be250e0f3f 100644 --- a/mindspore/ccsrc/device/memory_manager.h +++ b/mindspore/ccsrc/device/memory_manager.h @@ -46,11 +46,11 @@ class MemoryManager { uint8_t *MallocWorkSpaceMem(const AnfNodePtr &node, size_t index, int flag, size_t size); virtual uint8_t *MallocMem(int flag, size_t size); - virtual void MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); + virtual bool MallocMemFromMemPool(const DeviceAddressPtr address, size_t size); virtual void *MallocMemFromMemPool(size_t size); virtual void FreeMemFromMemPool(const DeviceAddressPtr address); virtual void FreeMemFromMemPool(void *device_ptr); - virtual void MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, + virtual bool MallocContinuousMemFromMemPool(const DeviceAddressPtrList addr_list, size_t total_size, std::vector size_list); virtual std::vector MallocContinuousMemFromMemPool(size_t total_size, std::vector size_list); diff --git a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc index b7280f52ae..a2dfce2241 100644 --- a/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/pre_activate/mem_reuse/mem_dynamic_allocator.cc @@ -38,9 +38,12 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size) { std::vector DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t total_size, std::vector size_list) { + std::vector device_addr_list; // Pre-alloc the one whole piece memory. auto device_addr = AllocTensorMem(total_size); - MS_EXCEPTION_IF_NULL(device_addr); + if (!device_addr) { + return device_addr_list; + } // Remove the pre-alloc memory. auto mem_block = FindMemBlock(device_addr); MS_EXCEPTION_IF_NULL(mem_block); @@ -54,7 +57,6 @@ std::vector DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t (void)mem_block->block_all_mem_buf_map_.erase(iter); // Split the pre-alloc memory into continuous memory by the size list. DynamicMemBufPtr continuous_mem_buf; - std::vector device_addr_list; auto buf_addr = device_addr; for (size_t i = 0; i < size_list.size(); i++) { continuous_mem_buf = std::make_shared(buf_addr, kMemBufUsed, size_list[i]); @@ -102,13 +104,16 @@ DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) { DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { size_t alloc_mem_size = CalMemBlockAllocSize(size); - + if (alloc_mem_size == 0) { + return nullptr; + } // Add new memory block DeviceMemPtr device_addr = nullptr; auto real_alloc_size = AllocDeviceMem(alloc_mem_size, &device_addr); if (real_alloc_size < size) { - MS_LOG(EXCEPTION) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" - << size << "]."; + MS_LOG(WARNING) << "Memory not enough: alloc size[" << real_alloc_size << "] is smaller than required size[" << size + << "]."; + return nullptr; } auto mem_block = std::make_shared(device_addr, real_alloc_size); MS_EXCEPTION_IF_NULL(mem_block); @@ -135,10 +140,10 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { size_t DynamicMemPoolBestFit::CalMemBlockAllocSize(size_t size) { auto device_free_mem_size = free_mem_size(); if (device_free_mem_size < size) { - MS_LOG(EXCEPTION) << "Memory not enough: current free memory size[" << device_free_mem_size - << "] is smaller than required size[" << size << "]."; + MS_LOG(WARNING) << "Memory not enough: current free memory size[" << device_free_mem_size + << "] is smaller than required size[" << size << "]."; + return 0; } - auto alloc_mem_size = mem_alloc_unit_size(); // Growing at twice of alloc size while (alloc_mem_size < size) { @@ -156,7 +161,6 @@ void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &me MS_EXCEPTION_IF_NULL(mem_buf); auto mem_block = FindMemBlock(mem_buf->device_addr_); MS_EXCEPTION_IF_NULL(mem_block); - // Divide new memory buf size_t newbuf_size = mem_buf->size_ - size; mem_buf->size_ = size;