From 98ce7c2039a79f0781ed0b2b27282e5d6abeec34 Mon Sep 17 00:00:00 2001 From: limingqi107 Date: Mon, 29 Mar 2021 12:23:57 +0800 Subject: [PATCH] add the impl of runtime actors --- mindspore/ccsrc/CMakeLists.txt | 1 + .../ccsrc/runtime/device/device_address.h | 2 + .../ccsrc/runtime/framework/CMakeLists.txt | 8 + .../runtime/framework/actor/actor_common.h | 46 +++++ .../framework/actor/data_source_actor.cc | 163 +++++++++++++++ .../framework/actor/data_source_actor.h | 70 +++++-- .../runtime/framework/actor/kernel_actor.cc | 190 ++++++++++++++++++ .../runtime/framework/actor/kernel_actor.h | 43 ++-- .../framework/actor/loop_count_actor.cc | 47 +++++ .../framework/actor/loop_count_actor.h | 4 +- .../framework/actor/memory_interface_actor.h | 39 ++++ .../framework/actor/memory_manager_actor.cc | 62 ++++++ .../framework/actor/memory_manager_actor.h | 8 +- .../runtime/framework/graph_scheduler.cc | 32 ++- .../ccsrc/runtime/framework/graph_scheduler.h | 11 +- .../hardware/cpu/cpu_device_context.cc | 4 +- .../runtime/hardware/cpu/cpu_device_context.h | 4 +- .../ccsrc/runtime/hardware/device_context.h | 6 +- .../hardware/gpu/gpu_device_context.cc | 36 +++- .../runtime/hardware/gpu/gpu_device_context.h | 6 +- mindspore/core/CMakeLists.txt | 3 + .../core/mindrt/include/actor/op_actor.h | 8 + 22 files changed, 733 insertions(+), 60 deletions(-) create mode 100644 mindspore/ccsrc/runtime/framework/CMakeLists.txt create mode 100644 mindspore/ccsrc/runtime/framework/actor/actor_common.h create mode 100644 mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc create mode 100644 mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc create mode 100644 mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc create mode 100644 mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h create mode 100644 mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 2a8db6b119..8c8f605d2d 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -208,6 +208,7 @@ set(SUB_COMP backend/kernel_compiler backend/session runtime/device + runtime/framework runtime/hardware runtime/hccl_adapter frontend/optimizer diff --git a/mindspore/ccsrc/runtime/device/device_address.h b/mindspore/ccsrc/runtime/device/device_address.h index dedbd7d5e0..0a6d87900b 100644 --- a/mindspore/ccsrc/runtime/device/device_address.h +++ b/mindspore/ccsrc/runtime/device/device_address.h @@ -44,6 +44,7 @@ class TaskGenerator; namespace gpu { class GPUKernelRuntime; class GPUMemoryManager; +class GPUDeviceContext; } // namespace gpu } // namespace device } // namespace mindspore @@ -107,6 +108,7 @@ class DeviceAddress : public mindspore::DeviceSync { friend class mindspore::device::cpu::CPUDeviceContext; friend class mindspore::device::gpu::GPUKernelRuntime; friend class mindspore::device::gpu::GPUMemoryManager; + friend class mindspore::device::gpu::GPUDeviceContext; friend class mindspore::device::ascend::AscendKernelRuntime; friend class mindspore::device::ascend::AscendMemoryManager; friend class mindspore::device::ascend::DataDumper; diff --git a/mindspore/ccsrc/runtime/framework/CMakeLists.txt b/mindspore/ccsrc/runtime/framework/CMakeLists.txt new file mode 100644 index 0000000000..03300454d0 --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/CMakeLists.txt @@ -0,0 +1,8 @@ +include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/include) +include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/src) + +file(GLOB_RECURSE FRAMEWORK_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") + +set_property(SOURCE ${FRAMEWORK_SRC_LIST} + PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_RUNTIME_FRAMEWORK) +add_library(_mindspore_runtime_framework_obj OBJECT ${FRAMEWORK_SRC_LIST}) \ No newline at end of file diff --git a/mindspore/ccsrc/runtime/framework/actor/actor_common.h b/mindspore/ccsrc/runtime/framework/actor/actor_common.h new file mode 100644 index 0000000000..8809ebec76 --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/actor_common.h @@ -0,0 +1,46 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_COMMON_H_ +#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_COMMON_H_ + +#include +#include "mindrt/include/actor/op_actor.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace runtime { +// The execution result of actor. +constexpr int kSuccess = 0; +constexpr int kFailure = 1; + +#define SET_OPCONTEXT_FAIL_RET_WITH_ERROR(op_context, message) \ + { \ + MS_LOG(ERROR) << message; \ + op_context.SetFailed(kFailure); \ + return; \ + } + +#define SET_OPCONTEXT_SUCCESS_RET(op_context) \ + { \ + op_context.SetSuccess(kSuccess); \ + return; \ + } + +} // namespace runtime +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_ACTOR_COMMON_H_ diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc new file mode 100644 index 0000000000..fcbe7ca86c --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.cc @@ -0,0 +1,163 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/framework/actor/data_source_actor.h" +#include "runtime/framework/actor/kernel_actor.h" +#include "runtime/framework/actor/memory_manager_actor.h" +#include "mindrt/include/async/async.h" +#include "common/trans.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace runtime { +void DataSourceActor::FetchData(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + if (buffers_.size() == buffer_capacity_) { + // Send output to trigger computing and free memory. + SendOutput(context); + FreeMemory(context); + buffers_.pop(); + return; + } + + // Construct device tensors and fill to the buffers from member nodes. + FillDataBuffer(); + if (buffers_.size() == 0) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The data queue is empty."); + } + + // Allocate memory for device tensors. + AllocateMemory(context); +} + +void DataSourceActor::AllocateMemory(OpContext *context) { + auto device_tensors = buffers_.back(); + Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, device_tensors, device_context_, context, GetAID()); +} + +void DataSourceActor::FreeMemory(OpContext *context) { + auto device_tensors = buffers_.front(); + Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, device_tensors, device_context_, context); +} + +void DataSourceActor::SendOutput(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + if (buffers_.size() == 0) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The data queue is empty."); + } + + // Send output data. + auto output_device_tensors = buffers_.front(); + for (auto &op_arrow : output_op_arrows_) { + MS_EXCEPTION_IF_NULL(op_arrow); + if (IntToSize(op_arrow->from_output_index_) >= output_device_tensors.size()) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The output index is of range."); + } + auto device_address = output_device_tensors[op_arrow->from_output_index_]; + auto data = std::make_shared>(op_arrow->to_op_id_, device_address, op_arrow->to_input_index_); + Async(op_arrow->to_op_id_, &KernelActor::RunOpData, data, context); + } +} + +void DeviceQueueDataSourceActor::FillDataBuffer() { + // Construct device tensors. + std::vector device_tensors; + for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(data_kernel_); ++i) { + auto device_address = AnfAlgo::GetMutableOutputAddr(data_kernel_, i, false); + MS_EXCEPTION_IF_NULL(device_address); + device_tensors.emplace_back(device_address.get()); + } + + buffers_.push(device_tensors); +} + +void DeviceQueueDataSourceActor::OnMemoryAllocFinish(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + MS_EXCEPTION_IF_NULL(device_context_); + if (buffers_.size() == 0) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The data queue is empty."); + } + + // Construct outputs of data kernel launching. + auto device_tensors = buffers_.back(); + std::vector kernel_outputs; + for (auto &device_tensor : device_tensors) { + MS_EXCEPTION_IF_NULL(device_tensor); + kernel_outputs.emplace_back(std::make_shared
(device_tensor->GetMutablePtr(), device_tensor->GetSize())); + } + + // Copy data from device queue by data kernel launching. + std::vector empty_address; + auto kernel_mod = AnfAlgo::GetKernelMod(data_kernel_); + auto ret = device_context_->LaunchKernel(kernel_mod, empty_address, empty_address, kernel_outputs); + if (!ret) { + std::string error_info = "Launch kernel failed: " + data_kernel_->ToString(); + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); + } + + // Send output to trigger computing and free memory. + SendOutput(context); + FreeMemory(context); + buffers_.pop(); +} + +void HostQueueDataSourceActor::FillDataBuffer() { + // Construct device tensors. + std::vector device_tensors; + for (auto &data_node : data_nodes_) { + auto device_address = AnfAlgo::GetMutableOutputAddr(data_node, 0, false); + MS_EXCEPTION_IF_NULL(device_address); + device_tensors.emplace_back(device_address.get()); + } + + buffers_.push(device_tensors); +} + +void HostQueueDataSourceActor::OnMemoryAllocFinish(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + if (buffers_.size() == 0) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The data queue is empty."); + } + + // Get host tensors from host queue and get device tensors from buffers. + MS_EXCEPTION_IF_NULL(host_queue_); + auto host_tensors = host_queue_->PullData(); + auto device_tensors = buffers_.back(); + if (host_tensors.size() != device_tensors.size()) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), + "The length of host tensors is not equal to the length of device tensors."); + } + + // Copy data from host tensor to device tensor. + for (size_t i = 0; i < host_tensors.size(); ++i) { + auto host_tensor = host_tensors[i]; + auto device_tensor = device_tensors[i]; + MS_EXCEPTION_IF_NULL(host_tensor); + MS_EXCEPTION_IF_NULL(device_tensor); + if (!device_tensor->SyncHostToDevice(trans::GetRuntimePaddingShape(data_nodes_[i], 0), + LongToSize(host_tensor->data().nbytes()), host_tensor->data_type(), + host_tensor->data_c())) { + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "SyncHostToDevice failed."); + } + } + + // Send output to trigger computing and free memory. + SendOutput(context); + FreeMemory(context); + buffers_.pop(); +} +} // namespace runtime +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h index 6183da58d9..7e6962efaf 100644 --- a/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/data_source_actor.h @@ -22,43 +22,73 @@ #include #include #include -#include "mindrt/include/actor/op_actor.h" -#include "mindrt/include/async/future.h" +#include +#include "runtime/framework/actor/actor_common.h" +#include "runtime/framework/actor/memory_interface_actor.h" +#include "runtime/hardware/device_context.h" #include "runtime/framework/device_tensor_store.h" #include "runtime/framework/host_tensor_queue.h" #include "base/base.h" namespace mindspore { namespace runtime { -// The data source actor is used to fetch data and process them into device tensors, -// and then send them to kernel actor. -class DataSourceActor : public ActorBase { +using mindspore::device::DeviceContext; + +// The data source actor is used to fetch data from data source and process them into device tensors, +// and then send them to kernel actor. The processing flow is FetchData -> FillDataBuffer -> AllocateMemory +// -> OnMemoryAllocFinish -> SendOutput -> FreeMemory. +class DataSourceActor : public MemoryInterfaceActor { public: - DataSourceActor(std::string name, size_t buffer_capacity) : ActorBase(name), buffer_capacity_(buffer_capacity) {} + DataSourceActor(std::string name, size_t buffer_capacity, const DeviceContext *device_context, + const AID memory_manager_aid) + : MemoryInterfaceActor(name), + buffer_capacity_(buffer_capacity), + device_context_(device_context), + memory_manager_aid_(memory_manager_aid) {} virtual ~DataSourceActor() = default; // The process entry of data processing. - virtual void FetchData(OpContext *context) = 0; + void FetchData(OpContext *context); + + // The memory related operation interface. + void AllocateMemory(OpContext *context) override; + void FreeMemory(OpContext *context) override; + // Copy data from data source to the device tensor buffer of actor after memory alloc finished. + void OnMemoryAllocFinish(OpContext *context) override{}; protected: + // Construct the device tensors and fill to device tensor buffer from the member nodes during the data fetching. + virtual void FillDataBuffer() = 0; + + // Send output to downstream actors to trigger computing after fetching data finished. + void SendOutput(OpContext *context); + // To trigger kernel actors running by op arrows. std::vector output_op_arrows_; - // The buffers store the data. - std::queue> buffers_; + // The buffers store the device tensors. + std::queue> buffers_; size_t buffer_capacity_; - // The sequential number of corresponding batch data. - std::queue sequential_nums_; + // The device interface of data copy. + const DeviceContext *device_context_; + + // The id of memory manager actor. Send message to it for alloc and free memory during the data processing. + const AID memory_manager_aid_; }; // The class represents that the data source is device queue. class DeviceQueueDataSourceActor : public DataSourceActor { public: - DeviceQueueDataSourceActor(std::string name, size_t buffer_capacity) : DataSourceActor(name, buffer_capacity) {} - virtual ~DeviceQueueDataSourceActor() = default; + DeviceQueueDataSourceActor(std::string name, size_t buffer_capacity, const DeviceContext *device_context, + const AID memory_manager_aid) + : DataSourceActor(name, buffer_capacity, device_context, memory_manager_aid) {} + ~DeviceQueueDataSourceActor() override = default; - void FetchData(OpContext *context) override; + void OnMemoryAllocFinish(OpContext *context) override; + + protected: + void FillDataBuffer() override; private: friend class GraphScheduler; @@ -70,11 +100,15 @@ class DeviceQueueDataSourceActor : public DataSourceActor { // The class represents that the data source is host queue. class HostQueueDataSourceActor : public DataSourceActor { public: - HostQueueDataSourceActor(std::string name, size_t buffer_capacity, HostTensorQueuePtr host_queue) - : DataSourceActor(name, buffer_capacity), host_queue_(host_queue) {} - virtual ~HostQueueDataSourceActor() = default; + HostQueueDataSourceActor(std::string name, size_t buffer_capacity, const DeviceContext *device_context, + const AID memory_manager_aid, HostTensorQueuePtr host_queue) + : DataSourceActor(name, buffer_capacity, device_context, memory_manager_aid), host_queue_(host_queue) {} + ~HostQueueDataSourceActor() override = default; - void FetchData(OpContext *context) override; + void OnMemoryAllocFinish(OpContext *context) override; + + protected: + void FillDataBuffer() override; private: friend class GraphScheduler; diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc new file mode 100644 index 0000000000..939b5a2bd6 --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc @@ -0,0 +1,190 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/framework/actor/kernel_actor.h" +#include "runtime/framework/actor/memory_manager_actor.h" +#include "mindrt/include/async/async.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace runtime { +void KernelActor::RunOpData(OpDataPtr input_data, OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + auto sequential_num = context->sequential_num_; + input_op_datas_[sequential_num].emplace_back(input_data); + // When all the input data are collected, then allocate memory and callback launch. + if (CheckLaunchCondition(context)) { + FetchInputDeviceTensor(context); + FetchOutputDeviceTensor(); + FetchWorkspaceDeviceTensor(); + AllocateMemory(context); + } +} + +void KernelActor::RunOpControl(AID *input_control, OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + auto sequential_num = context->sequential_num_; + input_op_controls_[sequential_num].emplace_back(input_control); + // When all the input data are collected, then allocate memory and callback launch. + if (CheckLaunchCondition(context)) { + FetchInputDeviceTensor(context); + FetchOutputDeviceTensor(); + FetchWorkspaceDeviceTensor(); + AllocateMemory(context); + } +} + +void KernelActor::AllocateMemory(OpContext *context) { + std::vector alloc_list(output_device_tensors_); + alloc_list.insert(alloc_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end()); + Async(memory_manager_aid_, &MemoryManagerActor::AllocateMemory, alloc_list, device_context_, context, GetAID()); +} + +void KernelActor::FreeMemory(OpContext *context) { + std::vector free_list(input_device_tensors_); + free_list.insert(free_list.end(), output_device_tensors_.begin(), output_device_tensors_.end()); + free_list.insert(free_list.end(), workspace_device_tensors_.begin(), workspace_device_tensors_.end()); + Async(memory_manager_aid_, &MemoryManagerActor::FreeMemory, free_list, device_context_, context); +} + +void KernelActor::OnMemoryAllocFinish(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + MS_EXCEPTION_IF_NULL(kernel_); + auto kernel_mod = AnfAlgo::GetKernelMod(kernel_); + std::vector kernel_inputs; + std::vector kernel_outputs; + std::vector kernel_workspaces; + FetchLaunchArgs(&kernel_inputs, &kernel_outputs, &kernel_workspaces); + MS_EXCEPTION_IF_NULL(device_context_); + auto ret = device_context_->LaunchKernel(kernel_mod, kernel_inputs, kernel_workspaces, kernel_outputs); + if (!ret) { + std::string error_info = "Launch kernel failed: " + kernel_->ToString(); + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); + } + SendOutput(context); + FreeMemory(context); +} + +bool KernelActor::CheckLaunchCondition(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + if (input_datas_num_ != 0) { + auto data_iter = input_op_datas_.find(context->sequential_num_); + if (data_iter == input_op_datas_.end()) { + return false; + } + if (data_iter->second.size() != input_datas_num_) { + return false; + } + } + + if (input_controls_num_ != 0) { + auto control_iter = input_op_controls_.find(context->sequential_num_); + if (control_iter == input_op_controls_.end()) { + return false; + } + if (control_iter->second.size() != input_controls_num_) { + return false; + } + } + return true; +} + +void KernelActor::FetchInputDeviceTensor(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + auto input_size = input_datas_num_ + device_tensor_store_keys_.size(); + input_device_tensors_.resize(input_size); + + auto data_iter = input_op_datas_.find(context->sequential_num_); + if (data_iter != input_op_datas_.end()) { + for (auto &input_data : data_iter->second) { + MS_EXCEPTION_IF_NULL(input_data); + input_device_tensors_[input_data->index_] = input_data->data_; + } + } + + for (auto &device_tensor_store_key : device_tensor_store_keys_) { + auto device_tensor = DeviceTensorStore::GetInstance().Fetch(device_tensor_store_key.second); + input_device_tensors_[device_tensor_store_key.first] = device_tensor.get(); + } +} + +void KernelActor::FetchOutputDeviceTensor() { + output_device_tensors_.clear(); + for (size_t i = 0; i < AnfAlgo::GetOutputTensorNum(kernel_); ++i) { + auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_, i, false); + MS_EXCEPTION_IF_NULL(device_address); + output_device_tensors_.emplace_back(device_address.get()); + } +} + +void KernelActor::FetchWorkspaceDeviceTensor() { + workspace_device_tensors_.clear(); + auto kernel_mod = AnfAlgo::GetKernelMod(kernel_); + MS_EXCEPTION_IF_NULL(kernel_mod); + auto workspace_sizes = kernel_mod->GetWorkspaceSizeList(); + for (size_t i = 0; i < workspace_sizes.size(); ++i) { + if (workspace_sizes[i] != 0) { + auto device_address = AnfAlgo::GetMutableWorkspaceAddr(kernel_, i); + MS_EXCEPTION_IF_NULL(device_address); + workspace_device_tensors_.emplace_back(device_address.get()); + } + } +} + +void KernelActor::FetchLaunchArgs(std::vector *kernel_inputs, std::vector *kernel_outputs, + std::vector *kernel_workspaces) { + MS_EXCEPTION_IF_NULL(kernel_inputs); + MS_EXCEPTION_IF_NULL(kernel_outputs); + MS_EXCEPTION_IF_NULL(kernel_workspaces); + for (auto &input : input_device_tensors_) { + MS_EXCEPTION_IF_NULL(input); + kernel_inputs->emplace_back(std::make_shared
(input->GetMutablePtr(), input->GetSize())); + } + + for (auto &output : output_device_tensors_) { + MS_EXCEPTION_IF_NULL(output); + kernel_outputs->emplace_back(std::make_shared
(output->GetMutablePtr(), output->GetSize())); + } + + for (auto &workspace : workspace_device_tensors_) { + MS_EXCEPTION_IF_NULL(workspace); + kernel_workspaces->emplace_back(std::make_shared
(workspace->GetMutablePtr(), workspace->GetSize())); + } +} + +void KernelActor::SendOutput(OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + // Send output data. + for (auto &op_arrow : output_op_arrows_) { + MS_EXCEPTION_IF_NULL(op_arrow); + if (IntToSize(op_arrow->from_output_index_) >= output_device_tensors_.size()) { + std::string error_info = "The output index is out of range: " + kernel_->ToString(); + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), error_info); + } + auto device_address = output_device_tensors_[op_arrow->from_output_index_]; + auto data = std::make_shared>(op_arrow->to_op_id_, device_address, op_arrow->to_input_index_); + Async(op_arrow->to_op_id_, &KernelActor::RunOpData, data, context); + } + + // Send output control. + auto source_aid = const_cast(&GetAID()); + for (auto &output_control : output_op_controls_) { + Async(output_control, &OpActor::RunOpControl, source_aid, context); + } +} + +} // namespace runtime +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h index aa9b1a25f7..630d250dbb 100644 --- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.h @@ -22,7 +22,8 @@ #include #include #include -#include "mindrt/include/actor/op_actor.h" +#include "runtime/framework/actor/actor_common.h" +#include "runtime/framework/actor/memory_interface_actor.h" #include "runtime/hardware/device_context.h" #include "runtime/framework/device_tensor_store.h" #include "backend/kernel_compiler/kernel.h" @@ -31,38 +32,47 @@ namespace mindspore { namespace runtime { using mindspore::device::DeviceContext; +using mindspore::kernel::Address; using mindspore::kernel::AddressPtr; // The kernel actor is used to receive the device tensors and control info to luanch kernel. -class KernelActor : public OpActor { +// The processing flow is RunOpData/RunOpControl -> CheckLaunchCondition -> AllocateMemory +// -> OnMemoryAllocFinish -> LaunchKernel -> SendOutput -> FreeMemory. +class KernelActor : public MemoryInterfaceActor { public: - KernelActor(std::string name, CNodePtr kernel, const DeviceContext *device_context) - : OpActor(name), kernel_(kernel), device_context_(device_context), input_datas_num_(0), input_controls_num_(0) {} - virtual ~KernelActor() = default; + KernelActor(std::string name, CNodePtr kernel, const DeviceContext *device_context, const AID memory_manager_aid) + : MemoryInterfaceActor(name), + kernel_(kernel), + device_context_(device_context), + memory_manager_aid_(memory_manager_aid), + input_datas_num_(0), + input_controls_num_(0) {} + ~KernelActor() override = default; // The kernel actor run when receive the input data. void RunOpData(OpDataPtr input_data, OpContext *context) override; // The kernel actor run when receive the input control. void RunOpControl(AID *input_control, OpContext *context) override; + // The memory related operation interface. + void AllocateMemory(OpContext *context) override; + void FreeMemory(OpContext *context) override; + // The real kernel launch processing after memory alloc finished. + void OnMemoryAllocFinish(OpContext *context) override; + private: friend class GraphScheduler; // Check whether satisfy the condition for launch. - bool CheckLaunchCondition(const uuids::uuid *sequential_num); + bool CheckLaunchCondition(OpContext *context); // Fetch the args of kernel launch. void FetchLaunchArgs(std::vector *kernel_inputs, std::vector *kernel_outputs, std::vector *kernel_workspaces); - // The real kernel launch processing. - void Launch(OpContext *context); // Send output data and output controls when finish kernel launch. void SendOutput(OpContext *context); - void AllocateMemory(OpContext *context); - void FreeMemory(OpContext *context); - // Fetch the device tensor for launch. - void FetchInputDeviceTensor(const uuids::uuid *sequential_num); + void FetchInputDeviceTensor(OpContext *context); void FetchOutputDeviceTensor(); void FetchWorkspaceDeviceTensor(); @@ -70,6 +80,9 @@ class KernelActor : public OpActor { // The device interface of kernel launch. const DeviceContext *device_context_; + // The id of memory manager actor. Send message to it for alloc and free memory during the kernel launch. + const AID memory_manager_aid_; + // The dependent input data number. size_t input_datas_num_; // The dependent input controls number. @@ -79,9 +92,9 @@ class KernelActor : public OpActor { std::vector> device_tensor_store_keys_; // The device tensors for launch. - std::vector input_device_tensors_; - std::vector output_device_tensors_; - std::vector workspace_device_tensors_; + std::vector input_device_tensors_; + std::vector output_device_tensors_; + std::vector workspace_device_tensors_; }; using KernelActorPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc new file mode 100644 index 0000000000..8daf2c1f8a --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.cc @@ -0,0 +1,47 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/framework/actor/loop_count_actor.h" +#include "runtime/framework/actor/data_source_actor.h" +#include "runtime/framework/actor/kernel_actor.h" +#include "mindrt/include/async/async.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace runtime { +void LoopCountActor::RunOpControl(AID *input_control, OpContext *context) { + MS_EXCEPTION_IF_NULL(context); + auto sequential_num = context->sequential_num_; + input_op_controls_[sequential_num].emplace_back(input_control); + if (input_op_controls_[sequential_num].size() == input_controls_num_) { + current_count_++; + if (current_count_ == loop_count_) { + current_count_ = 0; + SET_OPCONTEXT_SUCCESS_RET((*context)); + } + + // Send output control. + for (auto &data_source_aid : data_source_aids_) { + Async(data_source_aid, &DataSourceActor::FetchData, context); + } + auto source_aid = const_cast(&GetAID()); + for (auto &kernel_aid : no_input_kernel_aids_) { + Async(kernel_aid, &KernelActor::RunOpControl, source_aid, context); + } + } +} +} // namespace runtime +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h index fe922396d8..27b163f78e 100644 --- a/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/loop_count_actor.h @@ -21,7 +21,7 @@ #include #include #include -#include "mindrt/include/actor/op_actor.h" +#include "runtime/framework/actor/actor_common.h" #include "runtime/framework/device_tensor_store.h" namespace mindspore { @@ -32,7 +32,7 @@ class LoopCountActor : public OpActor { public: LoopCountActor(std::string name, size_t loop_count) : OpActor(name), loop_count_(loop_count), current_count_(0), input_controls_num_(0) {} - virtual ~LoopCountActor() = default; + ~LoopCountActor() override = default; // The loop count actor run when receive the input control. void RunOpControl(AID *input_control, OpContext *context) override; diff --git a/mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h b/mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h new file mode 100644 index 0000000000..114b0622d6 --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/memory_interface_actor.h @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ +#define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ + +#include +#include +#include "mindrt/include/actor/op_actor.h" +#include "runtime/framework/device_tensor_store.h" + +namespace mindspore { +namespace runtime { +// The actor represents a set of common memory related operations of actor. +class MemoryInterfaceActor : public OpActor { + public: + explicit MemoryInterfaceActor(std::string name) : OpActor(name) {} + virtual ~MemoryInterfaceActor() = default; + virtual void AllocateMemory(OpContext *context) = 0; + virtual void FreeMemory(OpContext *context) = 0; + virtual void OnMemoryAllocFinish(OpContext *context) = 0; +}; +} // namespace runtime +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_MEMORY_INTERFACE_ACTOR_H_ diff --git a/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc b/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc new file mode 100644 index 0000000000..685995f01e --- /dev/null +++ b/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.cc @@ -0,0 +1,62 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/framework/actor/memory_manager_actor.h" +#include "runtime/framework/actor/data_source_actor.h" +#include "runtime/framework/actor/kernel_actor.h" +#include "mindrt/include/async/async.h" +#include "utils/log_adapter.h" + +namespace mindspore { +namespace runtime { +void MemoryManagerActor::AllocateMemory(std::vector alloc_list, const DeviceContext *device_context, + OpContext *op_context, const AID from_aid) { + MS_EXCEPTION_IF_NULL(device_context); + MS_EXCEPTION_IF_NULL(op_context); + + for (auto &device_tensor : alloc_list) { + MS_EXCEPTION_IF_NULL(device_tensor); + if (device_tensor->GetPtr() != nullptr) { + continue; + } + // Allocate memory through the device context. + if (!device_context->AllocateMemory(device_tensor, device_tensor->GetSize())) { + std::string error_info = "Device memory isn't enough and alloc failed, actor name: " + from_aid.Name() + + ", alloc size: " + std::to_string(device_tensor->GetSize()); + SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*op_context), error_info); + } + } + + // Call back to the from actor to process after memory allocation finished. + Async(from_aid, &MemoryInterfaceActor::OnMemoryAllocFinish, op_context); +} + +void MemoryManagerActor::FreeMemory(std::vector free_list, const DeviceContext *device_context, + OpContext *) { + MS_EXCEPTION_IF_NULL(device_context); + for (auto &device_tensor : free_list) { + MS_EXCEPTION_IF_NULL(device_tensor); + // The reference count is decremented to zero to free memory, and reset to the original count. + device_tensor->DecreaseRefCountUsed(); + if (device_tensor->ref_count_dynamic_used() == 0) { + // Free memory through the device context. + device_context->FreeMemory(device_tensor); + device_tensor->ResetRefCountUsed(); + } + } +} +} // namespace runtime +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.h b/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.h index 9c9ab16318..3d4adccaef 100644 --- a/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/memory_manager_actor.h @@ -21,7 +21,7 @@ #include #include #include -#include "mindrt/include/actor/op_actor.h" +#include "runtime/framework/actor/actor_common.h" #include "runtime/framework/device_tensor_store.h" #include "runtime/hardware/device_context.h" @@ -36,10 +36,10 @@ class MemoryManagerActor : public ActorBase { ~MemoryManagerActor() override = default; // The process entry of memory alloc. - bool AllocateMemory(std::vector alloc_list, const DeviceContext *device_context, - OpContext *op_context); + void AllocateMemory(std::vector alloc_list, const DeviceContext *device_context, + OpContext *op_context, const AID from_aid); // The process entry of memory free. - void FreeMemory(std::vector free_list, const DeviceContext *device_context, + void FreeMemory(std::vector free_list, const DeviceContext *device_context, OpContext *op_context); }; } // namespace runtime diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc index 75b009798a..93260f9940 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc @@ -15,6 +15,7 @@ */ #include "runtime/framework/graph_scheduler.h" +#include "runtime/framework/actor/memory_manager_actor.h" #include "mindrt/src/actor/actormgr.h" #include "mindrt/include/async/async.h" #include "backend/session/anf_runtime_algorithm.h" @@ -102,6 +103,23 @@ void UpdateRefCount(const AnfNodePtr &node, size_t output_idx) { } } // namespace +void GraphScheduler::Initialize() { + if (init_) { + return; + } + init_ = true; + + // Create memory manager actor. + auto memory_manager_actor = std::make_shared(); + MS_EXCEPTION_IF_NULL(memory_manager_actor); + memory_manager_aid_ = memory_manager_actor->GetAID(); + // Schedule memory manager actor, bind single thread to response to memory alloc and free quickly. + auto base_actor = static_cast(memory_manager_actor); + auto actorMgr = ActorMgr::GetActorMgrRef(); + MS_EXCEPTION_IF_NULL(actorMgr); + (void)actorMgr->Spawn(base_actor, false); +} + ActorSet *GraphScheduler::Transform(const KernelGraphPtr &graph, const DeviceContext *device_context, const std::vector *input_tensors, GraphExecutionStrategy strategy) { @@ -191,7 +209,7 @@ ActorSetPtr GraphScheduler::Build(const KernelGraphPtr &graph, const DeviceConte auto actor_set = std::make_shared(); MS_EXCEPTION_IF_NULL(actor_set); - auto data_source_actors = BuildDataSourceActor(graph); + auto data_source_actors = BuildDataSourceActor(graph, device_context); actor_set->data_source_actors_.swap(data_source_actors); auto kernel_actors = BuildKernelActor(graph, device_context); @@ -251,7 +269,8 @@ void GraphScheduler::Link(ActorSet *actor_set, const KernelGraphPtr &graph, Grap LinkControlArrowForLoopCountActor(actor_set->loop_count_actor_.get(), graph); } -std::vector GraphScheduler::BuildDataSourceActor(const KernelGraphPtr &graph) { +std::vector GraphScheduler::BuildDataSourceActor(const KernelGraphPtr &graph, + const DeviceContext *device_context) { MS_EXCEPTION_IF_NULL(graph); std::vector data_source_actors; @@ -265,7 +284,8 @@ std::vector GraphScheduler::BuildDataSourceActor(const Kerne MS_LOG(INFO) << "Create host queue data source actor: " << actor_name; auto host_queue = std::make_shared(); graph_to_host_queue_.emplace(graph, host_queue); - host_queue_ds_actor = std::make_shared(actor_name, 1, host_queue); + host_queue_ds_actor = + std::make_shared(actor_name, 1, device_context, memory_manager_aid_, host_queue); data_source_actors.emplace_back(host_queue_ds_actor); } host_queue_ds_actor->data_nodes_.emplace_back(input_node); @@ -279,7 +299,8 @@ std::vector GraphScheduler::BuildDataSourceActor(const Kerne if (iter != execution_order.end()) { auto actor_name = graph->ToString() + "_" + "DeviceQueueDataSourceActor"; MS_LOG(INFO) << "Create queue data source actor: " << actor_name; - auto device_queue_ds_actor = std::make_shared(actor_name, 1); + auto device_queue_ds_actor = + std::make_shared(actor_name, 1, device_context, memory_manager_aid_); MS_EXCEPTION_IF_NULL(device_queue_ds_actor); data_source_actors.emplace_back(device_queue_ds_actor); device_queue_ds_actor->data_kernel_ = *iter; @@ -295,7 +316,8 @@ std::vector GraphScheduler::BuildKernelActor(const KernelGraphPt auto execution_order = graph->execution_order(); for (auto &kernel : execution_order) { if (IsKernelActor(kernel)) { - auto kernel_actor = std::make_shared(kernel->fullname_with_scope(), kernel, device_context); + auto kernel_actor = + std::make_shared(kernel->fullname_with_scope(), kernel, device_context, memory_manager_aid_); MS_EXCEPTION_IF_NULL(kernel_actor); kernel_actors.emplace_back(kernel_actor); } diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.h b/mindspore/ccsrc/runtime/framework/graph_scheduler.h index 0aa7bad6c6..d3927d3392 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.h +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.h @@ -61,6 +61,9 @@ class GraphScheduler { return instance; } + // The memory manager creating and scheduling. + void Initialize(); + // Transform graph to actor DAG, contains build and link. ActorSet *Transform(const KernelGraphPtr &graph, const DeviceContext *device_context, const std::vector *input_tensors = nullptr, @@ -87,7 +90,8 @@ class GraphScheduler { void Link(ActorSet *actor_set, const KernelGraphPtr &graph, GraphExecutionStrategy strategy); // The processing of actors build. - std::vector BuildDataSourceActor(const KernelGraphPtr &graph); + std::vector BuildDataSourceActor(const KernelGraphPtr &graph, + const DeviceContext *device_context); std::vector BuildKernelActor(const KernelGraphPtr &graph, const DeviceContext *device_context); std::vector BuildNoInputKernelActor(const KernelGraphPtr &graph); LoopCountActorPtr BuildLoopCountActor(const KernelGraphPtr &graph); @@ -114,6 +118,11 @@ class GraphScheduler { // The second element of pair represents the output index of kernel actor corresponding to the device tensor. std::unordered_map> device_address_to_actor_; + + // The id of memory manager actor. + AID memory_manager_aid_; + + bool init_{false}; }; } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc index 1c87120e14..b181408211 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc @@ -35,12 +35,12 @@ bool CPUDeviceContext::Initialize() { return true; } -bool CPUDeviceContext::AllocateMemory(const DeviceAddressPtr &address, size_t size) const { +bool CPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const { address->ptr_ = static_cast(mem_manager_.get())->StaticMemMalloc(size); return true; } -void CPUDeviceContext::FreeMemory(const DeviceAddressPtr &address) const { +void CPUDeviceContext::FreeMemory(DeviceAddress *const &address) const { static_cast(mem_manager_.get())->MemFree(address->ptr_); address->ptr_ = nullptr; } diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h index 835705272e..9d9578f122 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h @@ -33,8 +33,8 @@ class CPUDeviceContext : public DeviceContext { bool Initialize() override; - bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const override; - void FreeMemory(const DeviceAddressPtr &address) const override; + bool AllocateMemory(DeviceAddress *const &address, size_t size) const override; + void FreeMemory(DeviceAddress *const &address) const override; void SetOperatorInfo(const std::vector &nodes) const override; void CreateKernel(const std::vector &nodes) const override; diff --git a/mindspore/ccsrc/runtime/hardware/device_context.h b/mindspore/ccsrc/runtime/hardware/device_context.h index 893677f9db..cad5c43940 100644 --- a/mindspore/ccsrc/runtime/hardware/device_context.h +++ b/mindspore/ccsrc/runtime/hardware/device_context.h @@ -52,13 +52,13 @@ class DeviceContext { virtual void Destroy() {} // Relevant function to allocate and free device memory. - virtual bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const = 0; - virtual void FreeMemory(const DeviceAddressPtr &address) const = 0; + virtual bool AllocateMemory(DeviceAddress *const &address, size_t size) const = 0; + virtual void FreeMemory(DeviceAddress *const &address) const = 0; // Allocate continuous device memory end to end into 'addr_list'. // Communication operators may need continuous memory for input and output // to optimize the communication performance. - virtual bool AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, + virtual bool AllocateContinuousMemory(const std::vector &addr_list, size_t total_size, const std::vector &size_list) const { return true; } diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc index 6f8f3dbf03..c0ba9a11c1 100644 --- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc @@ -115,15 +115,41 @@ void GPUDeviceContext::Destroy() { } } -bool GPUDeviceContext::AllocateMemory(const DeviceAddressPtr &address, size_t size) const { - return mem_manager_->MallocMemFromMemPool(address, size); +bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const { + MS_EXCEPTION_IF_NULL(address); + auto device_ptr = mem_manager_->MallocMemFromMemPool(size); + if (!device_ptr) { + return false; + } + address->ptr_ = device_ptr; + address->size_ = size; + address->from_mem_pool_ = true; + return true; } -void GPUDeviceContext::FreeMemory(const DeviceAddressPtr &address) const { mem_manager_->FreeMemFromMemPool(address); } +void GPUDeviceContext::FreeMemory(DeviceAddress *const &address) const { + MS_EXCEPTION_IF_NULL(address); + MS_EXCEPTION_IF_NULL(address->ptr_); + mem_manager_->FreeMemFromMemPool(address->ptr_); + address->ptr_ = nullptr; +} -bool GPUDeviceContext::AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, +bool GPUDeviceContext::AllocateContinuousMemory(const std::vector &addr_list, size_t total_size, const std::vector &size_list) const { - return mem_manager_->MallocContinuousMemFromMemPool(addr_list, total_size, size_list); + auto device_ptr_list = mem_manager_->MallocContinuousMemFromMemPool(total_size, size_list); + if (device_ptr_list.size() == 0) { + return false; + } + if (addr_list.size() != device_ptr_list.size()) { + MS_LOG(EXCEPTION) << "The size of device list is not equal to the size of address list."; + } + for (size_t i = 0; i < addr_list.size(); i++) { + MS_EXCEPTION_IF_NULL(device_ptr_list[i]); + MS_EXCEPTION_IF_NULL(addr_list[i]); + addr_list[i]->ptr_ = device_ptr_list[i]; + addr_list[i]->from_mem_pool_ = true; + } + return true; } void GPUDeviceContext::SetOperatorInfo(const std::vector &nodes) const { diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h index 4b61a97462..0b99418d9e 100644 --- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.h @@ -38,9 +38,9 @@ class GPUDeviceContext : public DeviceContext { // Release device memory, stream, cudnn and cublas handle, etc. void Destroy() override; - bool AllocateMemory(const DeviceAddressPtr &address, size_t size) const override; - void FreeMemory(const DeviceAddressPtr &address) const override; - bool AllocateContinuousMemory(const DeviceAddressPtrList &addr_list, size_t total_size, + bool AllocateMemory(DeviceAddress *const &address, size_t size) const override; + void FreeMemory(DeviceAddress *const &address) const override; + bool AllocateContinuousMemory(const std::vector &addr_list, size_t total_size, const std::vector &size_list) const override; void SetOperatorInfo(const std::vector &nodes) const override; diff --git a/mindspore/core/CMakeLists.txt b/mindspore/core/CMakeLists.txt index e14f5588b3..2fd8c28f90 100644 --- a/mindspore/core/CMakeLists.txt +++ b/mindspore/core/CMakeLists.txt @@ -33,6 +33,9 @@ endif() set_property(SOURCE ${CORE_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_CORE) add_library(mindspore_core STATIC ${CORE_SRC_LIST}) target_link_libraries(mindspore_core PRIVATE mindspore_gvar) +if(NOT(COMPILE_LITE)) + target_link_libraries(mindspore_core PRIVATE mindrt_mid) +endif() if(USE_GLOG) target_link_libraries(mindspore_core PRIVATE mindspore::glog) diff --git a/mindspore/core/mindrt/include/actor/op_actor.h b/mindspore/core/mindrt/include/actor/op_actor.h index 309cb35493..f6685ae1ca 100644 --- a/mindspore/core/mindrt/include/actor/op_actor.h +++ b/mindspore/core/mindrt/include/actor/op_actor.h @@ -58,11 +58,19 @@ struct OpContext { std::vector> *results_; const void *kernel_call_back_before_; const void *kernel_call_back_after_; + void SetFailed(int32_t code) { for (auto promise : *results_) { promise.SetFailed(code); } } + + void SetSuccess(int32_t code) { + for (auto promise : *results_) { + promise.SetValue(code); + } + } + void SetResult(size_t index, int value) { results_->at(index).SetValue(value); } };