!1401 change gpu kernel runtime to support memory swap

Merge pull request !1401 from zyli2020/add_mem_swap_module_header
pull/1401/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 7241f3a434

File diff suppressed because it is too large Load Diff

@ -24,10 +24,12 @@
#include <unordered_map>
#include "device/kernel_runtime.h"
#include "device/kernel_runtime_manager.h"
#include "pre_activate/mem_reuse/mem_swap_manager.h"
namespace mindspore {
namespace device {
namespace gpu {
using mindspore::device::memswap::MemSwapManagerPtr;
class GPUKernelRuntime : public KernelRuntime {
public:
GPUKernelRuntime() = default;
@ -51,10 +53,19 @@ class GPUKernelRuntime : public KernelRuntime {
// The related functions and members for using dynamic memory pool.
void InitKernelRefCount(const session::KernelGraph *graph);
void InitKernelOutputAddress(const session::KernelGraph *graph);
void ClearKernelOutputAddress(const session::KernelGraph *graph);
bool LaunchKernelDynamic(const session::KernelGraph *graph);
void AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
bool AddMemSwapTask(const AnfNodePtr &kernel);
bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size);
void *AttemptMallocMem(size_t size);
bool AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces,
AddressPtrList *kernel_outputs);
bool AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs);
bool AllocKernelOutputDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
AddressPtrList *kernel_outputs);
bool AllocKernelWorkspaceDynamicRes(const mindspore::kernel::KernelMod &kernel_mod,
const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_workspaces);
void AllocCommunicationOpDynamicRes(const session::KernelGraph *graph);
void AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel);
void AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel);
@ -64,6 +75,8 @@ class GPUKernelRuntime : public KernelRuntime {
void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel, const AddressPtrList &kernel_workspaces,
uint32_t graph_id);
std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
std::unordered_map<void *, MemSwapManagerPtr> mem_swap_map_;
MemSwapManagerPtr mem_swap_manager_{nullptr};
};
MS_REG_KERNEL_RUNTIME(kGPUDevice, GPUKernelRuntime);
} // namespace gpu

@ -25,10 +25,7 @@ namespace memswap {
void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
execution_order_ = kernel_graph->execution_order();
FuncGraphManagerPtr manager = kernel_graph->manager();
NodeUsersMap user_map = manager->node_users();
size_t kernel_index = 0;
for (const auto &kernel : execution_order_) {
// parse topo order of kernel
kernel_execution_info_.emplace(kernel.get(), kernel_index++);
@ -44,6 +41,31 @@ void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) {
}
// parse topo order of user kernel
SaveUserKernelTopoOrder(kernel_graph);
sort(ordered_tensors_.begin(), ordered_tensors_.end(),
[](const TensorInfo &a, const TensorInfo &b) { return a.tensor_size_ > b.tensor_size_; });
auto cur_tensor_size = ordered_tensors_.front().tensor_size_;
for (auto &tensor_info : ordered_tensors_) {
if (cur_tensor_size != tensor_info.tensor_size_) {
cur_tensor_size = tensor_info.tensor_size_;
tensor_size_num_++;
}
}
tensor_size_threshold_ = ordered_tensors_.front().tensor_size_;
tensor_size_threshold_idx_ = 0;
distance_threshold_ = kernel_index / kDistanceInitFactor;
mem_swap_initialized_ = true;
MS_EXCEPTION_IF_NULL(mem_copy_manager_);
mem_copy_manager_->Init();
}
void MemSwapManager::SaveUserKernelTopoOrder(const mindspore::session::KernelGraph *kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
FuncGraphManagerPtr manager = kernel_graph->manager();
NodeUsersMap user_map = manager->node_users();
for (const auto &kernel : execution_order_) {
auto iter = user_map.find(kernel);
if (iter == user_map.end()) {
@ -66,24 +88,6 @@ void MemSwapManager::Init(const mindspore::session::KernelGraph *kernel_graph) {
sort(node_user_pair.second.begin(), node_user_pair.second.end());
}
}
sort(ordered_tensors_.begin(), ordered_tensors_.end(),
[](const TensorInfo &a, const TensorInfo &b) { return a.tensor_size_ > b.tensor_size_; });
auto cur_tensor_size = ordered_tensors_.front().tensor_size_;
for (auto &tensor_info : ordered_tensors_) {
if (cur_tensor_size != tensor_info.tensor_size_) {
cur_tensor_size = tensor_info.tensor_size_;
tensor_size_num_++;
}
}
tensor_size_threshold_ = ordered_tensors_.front().tensor_size_;
tensor_size_threshold_idx_ = 0;
distance_threshold_ = kernel_index / kDistanceInitFactor;
mem_swap_initialized_ = true;
MS_EXCEPTION_IF_NULL(mem_copy_manager_);
mem_copy_manager_->Init();
}
void MemSwapManager::AddSwapInfo() {
@ -228,12 +232,12 @@ float MemSwapManager::QueryKernelExecutionPerform(const AnfNodePtr &kernel) cons
return kernel_exec_info.execution_perform_;
}
bool MemSwapManager::QueryKerneTriggerSwap(const AnfNodePtr &kernel) const {
bool MemSwapManager::QueryKernelTriggerSwap(const AnfNodePtr &kernel) const {
const auto &kernel_exec_info = SearchKernelExecutionInfo(kernel);
return kernel_exec_info.trigger_swap_;
}
bool MemSwapManager::QueryKerneNeedSwap(const AnfNodePtr &kernel) const {
bool MemSwapManager::QueryKernelNeedSwap(const AnfNodePtr &kernel) const {
const auto &kernel_exec_info = SearchKernelExecutionInfo(kernel);
return kernel_exec_info.need_swap_;
}
@ -254,7 +258,7 @@ const PerformPair &MemSwapManager::QueryKernelSwapPerform(const AnfNodePtr &kern
return iter_output->second;
}
const std::vector<MemSwapInfo> &MemSwapManager::QueryKerneMemSwapInfo(const AnfNodePtr &kernel) const {
const std::vector<MemSwapInfo> &MemSwapManager::QueryKernelMemSwapInfo(const AnfNodePtr &kernel) const {
MS_EXCEPTION_IF_NULL(kernel);
auto iter = mem_swap_info_.find(kernel.get());
if (iter == mem_swap_info_.end()) {

@ -63,11 +63,11 @@ class MemSwapManager {
const PerformPair &QueryKernelSwapPerform(const AnfNodePtr &kernel, size_t output_idx) const;
bool QueryKerneTriggerSwap(const AnfNodePtr &kernel) const;
bool QueryKernelTriggerSwap(const AnfNodePtr &kernel) const;
bool QueryKerneNeedSwap(const AnfNodePtr &kernel) const;
bool QueryKernelNeedSwap(const AnfNodePtr &kernel) const;
const std::vector<MemSwapInfo> &QueryKerneMemSwapInfo(const AnfNodePtr &kernel) const;
const std::vector<MemSwapInfo> &QueryKernelMemSwapInfo(const AnfNodePtr &kernel) const;
void InsertSwapInBlackList(const void *device_ptr);
@ -90,6 +90,8 @@ class MemSwapManager {
void ResetSwapInfo();
void SaveUserKernelTopoOrder(const mindspore::session::KernelGraph *kernel_graph);
void AddKernelTriggerSwap(const AnfNodePtr &kernel, bool trigger_swap);
void AddKernelNeedSwap(const AnfNodePtr &kernel, bool need_swap);

Loading…
Cancel
Save