From 95565aa7b826d68f8ddc7f0c2f6fd1f0029fe10b Mon Sep 17 00:00:00 2001 From: lizhenyu Date: Fri, 5 Mar 2021 10:20:40 +0800 Subject: [PATCH] add hardware abstract layer --- mindspore/ccsrc/CMakeLists.txt | 1 + .../ccsrc/backend/session/gpu_session.cc | 3 +- .../ccsrc/runtime/device/device_address.h | 2 + .../runtime/device/gpu/gpu_kernel_build.cc | 4 +- .../runtime/device/gpu/gpu_kernel_build.h | 9 +- .../runtime/device/gpu/gpu_launch_kernel.cc | 3 +- .../ccsrc/runtime/hardware/CMakeLists.txt | 17 ++ .../hardware/cpu/cpu_device_context.cc | 78 +++++++++ .../runtime/hardware/cpu/cpu_device_context.h | 55 +++++++ .../ccsrc/runtime/hardware/device_context.h | 94 +++++++++++ .../hardware/device_context_manager.cc | 59 +++++++ .../runtime/hardware/device_context_manager.h | 69 ++++++++ .../hardware/gpu/gpu_device_context.cc | 154 ++++++++++++++++++ .../runtime/hardware/gpu/gpu_device_context.h | 64 ++++++++ 14 files changed, 603 insertions(+), 9 deletions(-) create mode 100644 mindspore/ccsrc/runtime/hardware/CMakeLists.txt create mode 100644 mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc create mode 100644 mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h create mode 100644 mindspore/ccsrc/runtime/hardware/device_context.h create mode 100644 mindspore/ccsrc/runtime/hardware/device_context_manager.cc create mode 100644 mindspore/ccsrc/runtime/hardware/device_context_manager.h create mode 100644 mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc create mode 100644 mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 96c595a8a3..d0b8195f3c 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -208,6 +208,7 @@ set(SUB_COMP backend/kernel_compiler backend/session runtime/device + runtime/hardware runtime/hccl_adapter frontend/optimizer frontend/parallel diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index f8b9fba49a..5d9d40dedb 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -196,7 +196,8 @@ void GPUSession::AssignStream(const std::shared_ptr &kernel_graph) } void GPUSession::BuildKernel(const std::shared_ptr &kernel_graph) const { - device::gpu::GpuBuild(kernel_graph); + auto kernels = kernel_graph->execution_order(); + device::gpu::CreateGPUKernel(kernels); } void GPUSession::AllocateMemory(KernelGraph *kernel_graph) const { diff --git a/mindspore/ccsrc/runtime/device/device_address.h b/mindspore/ccsrc/runtime/device/device_address.h index a3cc13f0c7..ebaecb68ab 100644 --- a/mindspore/ccsrc/runtime/device/device_address.h +++ b/mindspore/ccsrc/runtime/device/device_address.h @@ -31,6 +31,7 @@ namespace cpu { class CPUSimpleMemPlan; class CPUMemoryManager; class CPUKernelRuntime; +class CPUDeviceContext; } // namespace cpu namespace ascend { class AscendKernelRuntime; @@ -96,6 +97,7 @@ class DeviceAddress : public mindspore::DeviceSync { friend class mindspore::device::cpu::CPUSimpleMemPlan; friend class mindspore::device::cpu::CPUMemoryManager; friend class mindspore::device::cpu::CPUKernelRuntime; + friend class mindspore::device::cpu::CPUDeviceContext; friend class mindspore::device::gpu::GPUKernelRuntime; friend class mindspore::device::gpu::GPUMemoryManager; friend class mindspore::device::ascend::AscendKernelRuntime; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc index f63d1fc287..f4306e3ebe 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc @@ -27,12 +27,10 @@ namespace mindspore { namespace device { namespace gpu { -void GpuBuild(const KernelGraphPtr &kernel_graph) { +void CreateGPUKernel(const std::vector &kernels) { kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); MS_EXCEPTION_IF_NULL(bin_map); - MS_EXCEPTION_IF_NULL(kernel_graph); bool already_check_nvcc = false; - auto kernels = kernel_graph->execution_order(); std::vector akg_nodes; for (const auto &kernel : kernels) { std::string kernel_name = session::AnfRuntimeAlgorithm::GetCNodeName(kernel); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h index e4b207c1c7..182ef0fd5e 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.h @@ -13,16 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPUKERNELBUILD_H_ -#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPUKERNELBUILD_H_ +#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_BUILD_H_ +#define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_BUILD_H_ +#include #include #include "backend/session/kernel_graph.h" namespace mindspore { namespace device { namespace gpu { -void GpuBuild(const std::shared_ptr &kernel_graph); +void CreateGPUKernel(const std::vector &kernels); } // namespace gpu } // namespace device } // namespace mindspore -#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPUKERNELBUILD_H_ +#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_KERNEL_BUILD_H_ diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc index 8055b8ab6f..82ffcab6a4 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_launch_kernel.cc @@ -53,6 +53,7 @@ void GPULaunchkernel::KernelSelect(std::shared_ptr kernel_ } void GPULaunchkernel::KernelBuild(std::shared_ptr kernel_graph) { - device::gpu::GpuBuild(kernel_graph); + auto kernels = kernel_graph->execution_order(); + device::gpu::CreateGPUKernel(kernels); } } // namespace mindspore::device::gpu diff --git a/mindspore/ccsrc/runtime/hardware/CMakeLists.txt b/mindspore/ccsrc/runtime/hardware/CMakeLists.txt new file mode 100644 index 0000000000..99eb343145 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/CMakeLists.txt @@ -0,0 +1,17 @@ +file(GLOB_RECURSE HARDWARE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "device_context_manager.cc") + +if(ENABLE_GPU) + file(GLOB_RECURSE HARDWARE_GPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "gpu/gpu_device_context.cc") +endif() + +if(ENABLE_CPU) + file(GLOB_RECURSE HARDWARE_CPU_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "cpu/cpu_device_context.cc") +endif() + +set_property(SOURCE ${HARDWARE_SRC_LIST} ${HARDWARE_D_SRC_LIST} ${HARDWARE_GPU_SRC_LIST} ${HARDWARE_CPU_SRC_LIST} + PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) +add_library(_mindspore_runtime_hardware_obj OBJECT ${HARDWARE_SRC_LIST} ${HARDWARE_D_SRC_LIST} + ${HARDWARE_GPU_SRC_LIST} ${HARDWARE_CPU_SRC_LIST}) diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc new file mode 100644 index 0000000000..1c87120e14 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc @@ -0,0 +1,78 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/hardware/cpu/cpu_device_context.h" +#include +#include "runtime/device/cpu/cpu_device_address.h" +#include "runtime/device/cpu/cpu_memory_manager.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" +#include "runtime/device/cpu/kernel_select_cpu.h" +#include "utils/trace_base.h" + +namespace mindspore { +namespace device { +namespace cpu { +bool CPUDeviceContext::Initialize() { + if (initialized_) { + return true; + } + mem_manager_ = std::make_shared(); + MS_EXCEPTION_IF_NULL(mem_manager_); + initialized_ = true; + return true; +} + +bool CPUDeviceContext::AllocateMemory(const DeviceAddressPtr &address, size_t size) const { + address->ptr_ = static_cast(mem_manager_.get())->StaticMemMalloc(size); + return true; +} + +void CPUDeviceContext::FreeMemory(const DeviceAddressPtr &address) const { + static_cast(mem_manager_.get())->MemFree(address->ptr_); + address->ptr_ = nullptr; +} + +void CPUDeviceContext::SetOperatorInfo(const std::vector &nodes) const { + for (const auto &node : nodes) { + SetKernelInfo(node); + } +} + +void CPUDeviceContext::CreateKernel(const std::vector &nodes) const { + for (const auto &node : nodes) { + MS_EXCEPTION_IF_NULL(node); + std::string kernel_name = AnfAlgo::GetCNodeName(node); + std::shared_ptr cpu_kernel = kernel::CPUKernelFactory::GetInstance().Create(kernel_name, node); + if (!cpu_kernel) { + MS_LOG(EXCEPTION) << "Build cpu operator[" << node->fullname_with_scope() << "] failed"; + } + + cpu_kernel->Init(node); + AnfAlgo::SetKernelMod(cpu_kernel, node.get()); + } +} + +bool CPUDeviceContext::LaunchKernel(KernelMod *kernel_mod, const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) const { + MS_EXCEPTION_IF_NULL(kernel_mod); + return kernel_mod->Launch(inputs, workspace, outputs, nullptr); +} + +MS_REGISTER_DEVICE(kCPUDevice, CPUDeviceContext); +} // namespace cpu +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h new file mode 100644 index 0000000000..835705272e --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h @@ -0,0 +1,55 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_ +#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_ + +#include +#include +#include "runtime/hardware/device_context.h" +#include "runtime/hardware/device_context_manager.h" +#include "runtime/device/memory_manager.h" + +namespace mindspore { +namespace device { +namespace cpu { +class CPUDeviceContext : public DeviceContext { + public: + explicit CPUDeviceContext(const DeviceContextKey &device_context_key) + : DeviceContext(device_context_key), mem_manager_(nullptr), initialized_(false) {} + ~CPUDeviceContext() override = default; + + bool Initialize() override; + + bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const override; + void FreeMemory(const DeviceAddressPtr &address) const override; + + void SetOperatorInfo(const std::vector &nodes) const override; + void CreateKernel(const std::vector &nodes) const override; + + bool LaunchKernel(KernelMod *kernel_mod, const std::vector &inputs, + const std::vector &workspace, const std::vector &outputs) const override; + + private: + DISABLE_COPY_AND_ASSIGN(CPUDeviceContext); + + std::shared_ptr mem_manager_; + bool initialized_; +}; +} // namespace cpu +} // namespace device +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_RUNTIME_HARDWARE_CPU_CPU_DEVICE_CONTEXT_H_ diff --git a/mindspore/ccsrc/runtime/hardware/device_context.h b/mindspore/ccsrc/runtime/hardware/device_context.h new file mode 100644 index 0000000000..893677f9db --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/device_context.h @@ -0,0 +1,94 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_H_ +#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_H_ + +#include +#include +#include +#include "runtime/device/device_address.h" +#include "backend/session/kernel_graph.h" +#include "backend/session/anf_runtime_algorithm.h" + +namespace mindspore { +namespace device { +using mindspore::kernel::AddressPtr; +using mindspore::kernel::KernelMod; + +struct DeviceContextKey { + // device type name, such as 'GPU' 'Ascend' 'CPU'. + std::string device_name_; + uint32_t device_id_{0}; + + // Use the result of ToString() as key to look up DeviceContext + // in cache map which maintains created DeviceContext objects. + std::string ToString() const { return device_name_ + "_" + std::to_string(device_id_); } +}; + +// DeviceContext is unified interface of interaction with device. +class DeviceContext { + public: + explicit DeviceContext(const DeviceContextKey &device_context_key) : device_context_key_(device_context_key) {} + virtual ~DeviceContext() = default; + + // Initialize the device context and return success or not. + virtual bool Initialize() = 0; + + // Destroy device context and release device resource. + virtual void Destroy() {} + + // Relevant function to allocate and free device memory. + virtual bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const = 0; + virtual void FreeMemory(const DeviceAddressPtr &address) const = 0; + + // Allocate continuous device memory end to end into 'addr_list'. + // Communication operators may need continuous memory for input and output + // to optimize the communication performance. + virtual bool AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, + const std::vector &size_list) const { + return true; + } + + // Optimize the kernel graph according to different devices. + virtual void OptimizeGraph(const KernelGraphPtr &graph) const {} + + // Select the matching backend kernels according to the data type and format of input and output for all + // execution operators, and set final device data type and format information for backend kernels, device + // data type and format which replace original data type and format will use for executing kernels. + virtual void SetOperatorInfo(const std::vector &nodes) const {} + + // Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel, + // 'KernelMod' is real executive object of kernel. + virtual void CreateKernel(const std::vector &nodes) const {} + + // Launch a kernel via 'KernelMod' of the kernel. + virtual bool LaunchKernel(KernelMod *kernel_mod, const std::vector &inputs, + const std::vector &workspace, const std::vector &outputs) const = 0; + + // Synchronize stream, device such as GPU and Ascend need stream to launch kernel asynchronously, + // using 'SyncStream' to block thread and wait for completing all tasks in stream. + // Devices that do not need stream could ignore the implementation of this function. + virtual bool SyncStream(size_t stream_id = 0) { return true; } + + protected: + DeviceContextKey device_context_key_; +}; +using DeviceContextPtr = std::shared_ptr; +} // namespace device +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_H_ diff --git a/mindspore/ccsrc/runtime/hardware/device_context_manager.cc b/mindspore/ccsrc/runtime/hardware/device_context_manager.cc new file mode 100644 index 0000000000..f29a9f4921 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/device_context_manager.cc @@ -0,0 +1,59 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/hardware/device_context_manager.h" + +namespace mindspore { +namespace device { +void DeviceContextManager::Register(const std::string &device_name, DeviceContextCreator &&device_context_creator) { + if (device_context_creators_.find(device_name) == device_context_creators_.end()) { + (void)device_context_creators_.emplace(device_name, device_context_creator); + } +} + +void DeviceContextManager::ClearDeviceContexts() { + std::lock_guard guard(lock_); + for (auto &iter : device_contexts_) { + MS_LOG(INFO) << "Release device " << iter.first; + MS_EXCEPTION_IF_NULL(iter.second); + iter.second->Destroy(); + } + device_contexts_.clear(); +} + +DeviceContext *DeviceContextManager::GetDeviceContext(const DeviceContextKey &device_context_key) { + std::string device_context_key_str = device_context_key.ToString(); + std::lock_guard guard(lock_); + + auto device_context_iter = device_contexts_.find(device_context_key_str); + if (device_context_iter != device_contexts_.end()) { + return device_context_iter->second.get(); + } + + std::shared_ptr device_context; + auto creator_iter = device_context_creators_.find(device_context_key.device_name_); + if (creator_iter != device_context_creators_.end()) { + device_context = (creator_iter->second)(device_context_key); + MS_EXCEPTION_IF_NULL(device_context); + device_contexts_[device_context_key_str] = device_context; + } else { + MS_LOG(EXCEPTION) << "There is no device context creator for " << device_context_key.device_name_ + << " with device id " << device_context_key.device_id_; + } + return device_context.get(); +} +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/hardware/device_context_manager.h b/mindspore/ccsrc/runtime/hardware/device_context_manager.h new file mode 100644 index 0000000000..576bce0912 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/device_context_manager.h @@ -0,0 +1,69 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_ +#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "runtime/hardware/device_context.h" +namespace mindspore { +namespace device { +using DeviceContextCreator = std::function(const DeviceContextKey &)>; + +class DeviceContextManager { + public: + static DeviceContextManager &GetInstance() { + static DeviceContextManager instance; + return instance; + } + void Register(const std::string &device_name, DeviceContextCreator &&device_context_creator); + DeviceContext *GetDeviceContext(const DeviceContextKey &device_info); + void ClearDeviceContexts(); + + private: + DeviceContextManager() = default; + ~DeviceContextManager() = default; + DISABLE_COPY_AND_ASSIGN(DeviceContextManager); + + // The string converted from DeviceContextKey -> DeviceContextPtr. + std::map device_contexts_; + // The name of device -> DeviceContextCreator. + std::map device_context_creators_; + std::mutex lock_; +}; + +class DeviceContextRegister { + public: + DeviceContextRegister(const std::string &device_name, DeviceContextCreator &&runtime_creator) { + DeviceContextManager::GetInstance().Register(device_name, std::move(runtime_creator)); + } + ~DeviceContextRegister() = default; +}; + +#define MS_REGISTER_DEVICE(DEVICE_NAME, DEVICE_CONTEXT_CLASS) \ + static const DeviceContextRegister g_device_##DEVICE_NAME##_reg( \ + DEVICE_NAME, [](const DeviceContextKey &device_context_key) { \ + return std::make_shared(device_context_key); \ + }); +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_CCSRC_RUNTIME_HARDWARE_DEVICE_CONTEXT_MANAGER_H_ diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc new file mode 100644 index 0000000000..6f8f3dbf03 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc @@ -0,0 +1,154 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"){} + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/hardware/gpu/gpu_device_context.h" +#include +#include "runtime/device/gpu/kernel_info_setter.h" +#include "runtime/device/gpu/gpu_kernel_build.h" +#include "runtime/device/gpu/gpu_device_address.h" +#include "runtime/device/gpu/gpu_memory_manager.h" +#include "runtime/device/gpu/gpu_memory_allocator.h" +#include "runtime/device/gpu/gpu_stream_assign.h" +#include "runtime/device/gpu/distribution/collective_init.h" +#include "runtime/device/gpu/gpu_device_manager.h" +#include "runtime/device/gpu/gpu_buffer_mgr.h" +#include "backend/kernel_compiler/common_utils.h" +#include "runtime/device/gpu/gpu_common.h" + +namespace mindspore { +namespace device { +namespace gpu { +bool GPUDeviceContext::Initialize() { + if (initialized_ == true) { + GPUMemoryAllocator::GetInstance().CheckMaxDeviceMemory(); + return true; + } + + // Set device id and initialize device resource. + bool ret = InitDevice(); + if (!ret) { + MS_LOG(ERROR) << "GPU InitDevice failed."; + return ret; + } + + // Initialize memory pool. + mem_manager_ = std::make_shared(); + MS_EXCEPTION_IF_NULL(mem_manager_); + mem_manager_->MallocDeviceMemory(); + + // Initialize NCCL. + const void *collective_handle_ = CollectiveInitializer::instance().collective_handle(); + bool collective_inited = CollectiveInitializer::instance().collective_inited(); + if (collective_inited && collective_handle_ != nullptr) { + auto init_nccl_comm_funcptr = + reinterpret_cast(dlsym(const_cast(collective_handle_), "InitNCCLComm")); + MS_EXCEPTION_IF_NULL(init_nccl_comm_funcptr); + (*init_nccl_comm_funcptr)(); + } + + initialized_ = true; + return ret; +} + +bool GPUDeviceContext::InitDevice() { + if (GPUDeviceManager::GetInstance().device_count() <= 0) { + MS_LOG(ERROR) << "No GPU device found."; + return false; + } + + if (!GPUDeviceManager::GetInstance().is_device_id_init()) { + if (!GPUDeviceManager::GetInstance().set_cur_device_id(device_context_key_.device_id_)) { + MS_LOG(ERROR) << "Failed to set current device id: " << SizeToInt(device_context_key_.device_id_); + return false; + } + } + + // Initialize device resource, such as stream, cudnn and cublas handle. + GPUDeviceManager::GetInstance().InitDevice(); + auto stream = GPUDeviceManager::GetInstance().default_stream(); + if (stream == nullptr) { + MS_LOG(ERROR) << "No default CUDA stream found."; + return false; + } + streams_.push_back(stream); + return true; +} + +void GPUDeviceContext::Destroy() { + // Release GPU buffer manager resource + if (GpuBufferMgr::GetInstance().IsInit()) { + if (!GpuBufferMgr::GetInstance().IsClosed() && !GpuBufferMgr::GetInstance().CloseNotify()) { + MS_LOG(EXCEPTION) << "Could not close gpu data queue."; + } + CHECK_OP_RET_WITH_EXCEPT(GpuBufferMgr::GetInstance().Destroy(), "Could not destroy gpu data queue."); + } + + // Release stream, cudnn and cublas handle, etc. + GPUDeviceManager::GetInstance().ReleaseDevice(); + + // Release device memory + if (mem_manager_ != nullptr) { + mem_manager_->FreeDeviceMemory(); + mem_manager_ = nullptr; + } + + // Clean GPU cache kernels which is generated by AKG + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!(context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG))) { + kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); + MS_EXCEPTION_IF_NULL(bin_map); + bin_map->RemoveKernelCache(); + } +} + +bool GPUDeviceContext::AllocateMemory(const DeviceAddressPtr &address, size_t size) const { + return mem_manager_->MallocMemFromMemPool(address, size); +} + +void GPUDeviceContext::FreeMemory(const DeviceAddressPtr &address) const { mem_manager_->FreeMemFromMemPool(address); } + +bool GPUDeviceContext::AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, + const std::vector &size_list) const { + return mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); +} + +void GPUDeviceContext::SetOperatorInfo(const std::vector &nodes) const { + for (const auto &node : nodes) { + SetKernelInfo(node); + } +} + +void GPUDeviceContext::CreateKernel(const std::vector &nodes) const { CreateGPUKernel(nodes); } + +bool GPUDeviceContext::LaunchKernel(KernelMod *kernel_mod, const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) const { + MS_EXCEPTION_IF_NULL(kernel_mod); + return kernel_mod->Launch(inputs, workspace, outputs, streams_.front()); +} + +bool GPUDeviceContext::SyncStream(size_t stream_id) { + if (stream_id >= streams_.size()) { + MS_LOG(EXCEPTION) << "The stream_id: " << stream_id << " is greater than stream array size: " << streams_.size(); + } + return GPUDeviceManager::GetInstance().SyncStream(streams_[stream_id]); +} + +MS_REGISTER_DEVICE(kGPUDevice, GPUDeviceContext); +} // namespace gpu +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h new file mode 100644 index 0000000000..4b61a97462 --- /dev/null +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h @@ -0,0 +1,64 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_HARDWARE_GPU_GPU_DEVICE_CONTEXT_H_ +#define MINDSPORE_CCSRC_RUNTIME_HARDWARE_GPU_GPU_DEVICE_CONTEXT_H_ + +#include +#include +#include "runtime/hardware/device_context.h" +#include "runtime/hardware/device_context_manager.h" +#include "runtime/device/memory_manager.h" + +namespace mindspore { +namespace device { +namespace gpu { +class GPUDeviceContext : public DeviceContext { + public: + explicit GPUDeviceContext(const DeviceContextKey &device_context_key) + : DeviceContext(device_context_key), mem_manager_(nullptr), initialized_(false) {} + ~GPUDeviceContext() override = default; + + // Set device id and initialize device resource, such as stream, cudnn and cublas handle. + bool Initialize() override; + + // Release device memory, stream, cudnn and cublas handle, etc. + void Destroy() override; + + bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const override; + void FreeMemory(const DeviceAddressPtr &address) const override; + bool AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, + const std::vector &size_list) const override; + + void SetOperatorInfo(const std::vector &nodes) const override; + void CreateKernel(const std::vector &nodes) const override; + bool LaunchKernel(KernelMod *kernel_mod, const std::vector &inputs, + const std::vector &workspace, const std::vector &outputs) const override; + + bool SyncStream(size_t stream_id = 0) override; + + private: + DISABLE_COPY_AND_ASSIGN(GPUDeviceContext); + bool InitDevice(); + + std::shared_ptr mem_manager_; + std::vector streams_; + bool initialized_; +}; +} // namespace gpu +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_CCSRC_RUNTIME_HARDWARE_GPU_GPU_DEVICE_CONTEXT_H_