|
|
@ -20,6 +20,7 @@
|
|
|
|
#include <string>
|
|
|
|
#include <string>
|
|
|
|
#include <memory>
|
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <set>
|
|
|
|
#include <utility>
|
|
|
|
#include <utility>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include "runtime/device/kernel_runtime.h"
|
|
|
|
#include "runtime/device/kernel_runtime.h"
|
|
|
@ -55,23 +56,27 @@ class GPUKernelRuntime : public KernelRuntime {
|
|
|
|
void InitKernelOutputAddress(const session::KernelGraph *graph);
|
|
|
|
void InitKernelOutputAddress(const session::KernelGraph *graph);
|
|
|
|
void InitKernelWorkspaceAddress(const session::KernelGraph *graph);
|
|
|
|
void InitKernelWorkspaceAddress(const session::KernelGraph *graph);
|
|
|
|
void InitMemorySwapInfo(const session::KernelGraph *graph);
|
|
|
|
void InitMemorySwapInfo(const session::KernelGraph *graph);
|
|
|
|
|
|
|
|
void SaveGraphOutputNode(const session::KernelGraph *graph);
|
|
|
|
|
|
|
|
bool IsGraphOutput(const session::KernelGraph *graph, const mindspore::AnfNodePtr &kernel) const;
|
|
|
|
void ClearKernelOutputAddress(const session::KernelGraph *graph);
|
|
|
|
void ClearKernelOutputAddress(const session::KernelGraph *graph);
|
|
|
|
void ClearKernelWorkspaceAddress(const session::KernelGraph *graph);
|
|
|
|
void ClearKernelWorkspaceAddress(const session::KernelGraph *graph);
|
|
|
|
void ClearKernelOldOutputAndWorkspace(const session::KernelGraph *graph);
|
|
|
|
void ClearKernelOldOutputAndWorkspace(const session::KernelGraph *graph);
|
|
|
|
|
|
|
|
bool RunOneStep(const session::KernelGraph *graph);
|
|
|
|
bool SearchMemSwapScheme(const session::KernelGraph *graph);
|
|
|
|
bool SearchMemSwapScheme(const session::KernelGraph *graph);
|
|
|
|
bool RefineMemSwapScheme(const session::KernelGraph *graph);
|
|
|
|
bool RefineMemSwapScheme(const session::KernelGraph *graph);
|
|
|
|
bool LaunchKernelDynamic(const session::KernelGraph *graph, bool mock = false, bool profiling = false);
|
|
|
|
bool LaunchKernelDynamic(const session::KernelGraph *graph, bool mock = false, bool profiling = false);
|
|
|
|
void LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs,
|
|
|
|
void LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs,
|
|
|
|
const AddressPtrList &workspace, const AddressPtrList &outputs);
|
|
|
|
const AddressPtrList &workspace, const AddressPtrList &outputs);
|
|
|
|
bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size);
|
|
|
|
bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size, bool mock);
|
|
|
|
bool AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
|
|
|
|
bool AllocKernelDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
|
|
|
|
AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces,
|
|
|
|
AddressPtrList *kernel_inputs, AddressPtrList *kernel_workspaces,
|
|
|
|
AddressPtrList *kernel_outputs);
|
|
|
|
AddressPtrList *kernel_outputs, bool mock);
|
|
|
|
bool AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs);
|
|
|
|
bool AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_inputs, bool mock);
|
|
|
|
bool AllocKernelOutputDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
|
|
|
|
bool AllocKernelOutputDynamicRes(const mindspore::kernel::KernelMod &kernel_mod, const mindspore::AnfNodePtr &kernel,
|
|
|
|
AddressPtrList *kernel_outputs);
|
|
|
|
AddressPtrList *kernel_outputs, bool mock);
|
|
|
|
bool AllocKernelWorkspaceDynamicRes(const mindspore::kernel::KernelMod &kernel_mod,
|
|
|
|
bool AllocKernelWorkspaceDynamicRes(const mindspore::kernel::KernelMod &kernel_mod,
|
|
|
|
const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_workspaces);
|
|
|
|
const mindspore::AnfNodePtr &kernel, AddressPtrList *kernel_workspaces,
|
|
|
|
|
|
|
|
bool mock);
|
|
|
|
void AllocCommunicationOpDynamicRes(const session::KernelGraph *graph);
|
|
|
|
void AllocCommunicationOpDynamicRes(const session::KernelGraph *graph);
|
|
|
|
void AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
|
void AllocCommunicationOpInputDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
|
void AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
|
void AllocCommunicationOpOutputDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
@ -79,15 +84,16 @@ class GPUKernelRuntime : public KernelRuntime {
|
|
|
|
const DeviceAddressPtrList addr_list, size_t total_size,
|
|
|
|
const DeviceAddressPtrList addr_list, size_t total_size,
|
|
|
|
std::vector<size_t> size_list);
|
|
|
|
std::vector<size_t> size_list);
|
|
|
|
void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
|
void FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel);
|
|
|
|
bool AddMemorySwapTask(const AnfNodePtr &kernel);
|
|
|
|
bool UpdateMemorySwapTask(const AnfNodePtr &kernel, bool mock, bool profiling);
|
|
|
|
bool UpdateMemorySwapInfo(const session::KernelGraph *graph);
|
|
|
|
bool AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bool profiling);
|
|
|
|
bool UpdateMemorySwapTask(const AnfNodePtr &kernel);
|
|
|
|
void UpdateHostSwapInQueue(const DeviceAddressPtr device_address, bool mock);
|
|
|
|
void UpdateHostSwapQueue(const DeviceAddressPtr device_address);
|
|
|
|
void UpdateHostSwapOutQueue(bool mock);
|
|
|
|
void UpdateDeviceSwapQueue();
|
|
|
|
void ClearSwapInfo(bool mock);
|
|
|
|
void ClearSwapQueue();
|
|
|
|
|
|
|
|
std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
|
|
|
|
std::unordered_map<uint32_t, MemReuseUtilPtr> mem_reuse_util_map_;
|
|
|
|
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
|
|
|
|
std::unordered_map<uint32_t, MemSwapManagerPtr> mem_swap_map_;
|
|
|
|
std::unordered_map<uint32_t, bool> is_first_step_map_;
|
|
|
|
std::unordered_map<uint32_t, bool> is_first_step_map_;
|
|
|
|
|
|
|
|
std::unordered_map<uint32_t, std::set<AnfNodePtr>> graph_output_map_;
|
|
|
|
|
|
|
|
|
|
|
|
MemReuseUtilPtr mem_reuse_util_{nullptr};
|
|
|
|
MemReuseUtilPtr mem_reuse_util_{nullptr};
|
|
|
|
MemSwapManagerPtr mem_swap_manager_{nullptr};
|
|
|
|
MemSwapManagerPtr mem_swap_manager_{nullptr};
|
|
|
|
};
|
|
|
|
};
|
|
|
|