From 744355a0056feed0e3bdb81b1642c51e47f5834e Mon Sep 17 00:00:00 2001 From: Harshvardhan Gupta Date: Tue, 13 Oct 2020 09:52:01 -0400 Subject: [PATCH] remove dbg args from runtime and remove needless argument from IsWatchpoint --- .../mem_reuse/mem_reuse_allocator.cc | 5 +- .../ccsrc/backend/session/ascend_session.cc | 2 +- .../ccsrc/backend/session/gpu_session.cc | 4 - mindspore/ccsrc/debug/debug_services.cc | 3 +- mindspore/ccsrc/debug/debug_services.h | 2 +- mindspore/ccsrc/debug/debugger/debugger.cc | 56 ++++++++++++-- mindspore/ccsrc/debug/debugger/debugger.h | 6 ++ .../device/ascend/ascend_kernel_runtime.cc | 56 ++------------ .../device/ascend/ascend_kernel_runtime.h | 4 +- .../runtime/device/cpu/cpu_kernel_runtime.cc | 2 +- .../runtime/device/cpu/cpu_kernel_runtime.h | 2 +- .../runtime/device/gpu/gpu_kernel_runtime.cc | 76 +++++++------------ .../runtime/device/gpu/gpu_kernel_runtime.h | 11 ++- .../ccsrc/runtime/device/kernel_runtime.cc | 2 +- .../ccsrc/runtime/device/kernel_runtime.h | 15 +++- 15 files changed, 115 insertions(+), 131 deletions(-) diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc index 24a9474f48..12c7317582 100644 --- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc +++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_reuse_allocator.cc @@ -20,8 +20,8 @@ #include "runtime/device/ascend/ascend_stream_assign.h" #endif #ifdef ENABLE_DEBUGGER -#include "debug/debugger/debugger.h" #include "debug/debug_services.h" +#include "debug/debugger/debugger.h" #endif namespace mindspore { @@ -82,9 +82,8 @@ bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr auto debugger_ = mindspore::Debugger::GetInstance(); if (debugger_->DebuggerBackendEnabled()) { DebugServices *debug_services = debugger_->debug_services(); - auto watchpoint_table = debug_services->GetWatchpointTable(); std::string current_kernel_name = kernel_curr->scope_full_name(); - if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) { + if (debug_services->IsWatchPoint(current_kernel_name)) { return false; } } diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index f90db100e0..09bd1134c1 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -602,7 +602,7 @@ void AscendSession::LoadTensor(const std::shared_ptr &kernel_graph) tensor_loader->EmptyTensor(); uint32_t iter_num = tensor_loader->GetIterNum(); tensor_loader->set_iter_num(++iter_num); - (void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get()); + (void)runtime_instance->LoadData(kernel_graph.get()); tensor_loader->EmptyPrevTensor(); } #endif diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index 9c62c1a5f6..afd86a2dc2 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -218,11 +218,7 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, void GPUSession::Execute(const std::shared_ptr &kernel_graph) const { auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); MS_EXCEPTION_IF_NULL(runtime_instance); -#ifdef ENABLE_DEBUGGER - if (!runtime_instance->Run(kernel_graph.get(), false, debugger_.get())) { -#else if (!runtime_instance->Run(kernel_graph.get(), false)) { -#endif MS_LOG(EXCEPTION) << "GPU execute graph failed!"; } } diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc index 04b9925c92..0a3899410d 100644 --- a/mindspore/ccsrc/debug/debug_services.cc +++ b/mindspore/ccsrc/debug/debug_services.cc @@ -234,8 +234,7 @@ void DebugServices::ReadNodesTensors(std::vector name, std::vector< } } -bool DebugServices::IsWatchPoint(std::string kernel_name, - std::unordered_map watchpoint_table) { +bool DebugServices::IsWatchPoint(std::string kernel_name) { bool ret = false; for (auto w_table_item : watchpoint_table) { auto check_node_list = std::get<1>(w_table_item).check_node_list; diff --git a/mindspore/ccsrc/debug/debug_services.h b/mindspore/ccsrc/debug/debug_services.h index 1dab625632..eda152b4d8 100644 --- a/mindspore/ccsrc/debug/debug_services.h +++ b/mindspore/ccsrc/debug/debug_services.h @@ -136,7 +136,7 @@ class DebugServices { std::vector *data_ptr, std::vector *data_size, std::vector *dtype, std::vector> *shape); - bool IsWatchPoint(std::string kernel_name, std::unordered_map watchpoint_table); + bool IsWatchPoint(std::string kernel_name); TensorLoader *tensor_loader() const; diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index fb4d7b5bbf..f867b11ccb 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -49,7 +49,7 @@ namespace mindspore { DebuggerPtr Debugger::debugger_ = nullptr; std::mutex Debugger::instance_lock_; -static const size_t PRAMATER_OUTPUT_INDEX = 0; +static const size_t PARAMETER_OUTPUT_INDEX = 0; static const size_t VALUE_NODE_OUTPUT_INDEX = 0; Debugger::Debugger() @@ -279,8 +279,7 @@ void Debugger::PostExecute() { bool Debugger::ReadNodeDataRequired() { if (debugger_enabled_ && !is_dataset_graph_) { - auto watchpoint_table = debug_services_->GetWatchpointTable(); - auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); + auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_); // if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) { return true; @@ -296,8 +295,7 @@ void Debugger::PostExecuteNode() { return; } if (debugger_enabled_ && !is_dataset_graph_) { - auto watchpoint_table = debug_services_->GetWatchpointTable(); - auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table); + auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_); // if kernel is watchpoint,and get hit. suspend. bool hit_empty_flag = true; @@ -914,7 +912,7 @@ void Debugger::LoadParametersAndConst() { MS_LOG(INFO) << "Start to load Parameters!"; const auto ¶meters = graph_ptr_->inputs(); for (auto &item : parameters) { - LoadSingleAnfnode(item, PRAMATER_OUTPUT_INDEX); + LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX); } // load value nodes // get all constant avlues from the graph @@ -925,4 +923,50 @@ void Debugger::LoadParametersAndConst() { } } +void Debugger::LoadGraphOutputs() { + if (!(debugger_enabled() && device_target_ == kAscendDevice)) return; + MS_EXCEPTION_IF_NULL(graph_ptr_); + const auto &apply_kernels = graph_ptr_->execution_order(); + // for kernels, execution order starts from 1 + int exec_order = 1; + for (const auto &node : apply_kernels) { + MS_EXCEPTION_IF_NULL(node); + auto node_name = AnfAlgo::GetCNodeName(node); + std::string kernel_name = node->fullname_with_scope(); + auto output_size = AnfAlgo::GetOutputTensorNum(node); + if (partial_memory_) { + if (!debug_services_->IsWatchPoint(kernel_name)) { + continue; + } + } + for (size_t j = 0; j < output_size; ++j) { + auto addr = AnfAlgo::GetOutputAddr(node, j); + MS_EXCEPTION_IF_NULL(addr); + auto type = AnfAlgo::GetOutputInferDataType(node, j); + auto format = kOpFormat_DEFAULT; + string tensor_name = kernel_name + ':' + std::to_string(j); + ShapeVector int_shapes; + auto shape = AnfAlgo::GetOutputDeviceShape(node, j); + (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), + [](size_t inner_item) { return SizeToInt(inner_item); }); + auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); + if (!ret) { + MS_LOG(ERROR) << "LoadMemToHost:" + << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; + } + } + exec_order = exec_order + 1; + } +} + +void Debugger::UpdateStepNum() { + if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) + ++num_step_; +} + +void Debugger::ClearCurrentData() { + if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration())) + debug_services_->tensor_loader()->EmptyCurrentTensor(); +} + } // namespace mindspore diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 7dc058b732..a84e5c73d6 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -105,6 +105,12 @@ class Debugger : public std::enable_shared_from_this { void LoadParametersAndConst(); + void UpdateStepNum(); + + void ClearCurrentData(); + + void LoadGraphOutputs(); + private: // private constructor for singleton Debugger(); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index fc7e39f805..700165fa66 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -263,6 +263,7 @@ bool AscendKernelRuntime::Init() { if (!ret) { return ret; } + SetDebugger(); mem_manager_ = std::make_shared(); MS_EXCEPTION_IF_NULL(mem_manager_); mem_manager_->MallocDeviceMemory(); @@ -271,63 +272,16 @@ bool AscendKernelRuntime::Init() { return ret; } -#ifdef ENABLE_DEBUGGER -namespace { -void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) { - MS_EXCEPTION_IF_NULL(graph); - // trans_flag: "true" means tensor values will be transfered to host format, otherwise not. - bool trans_flag = false; - const auto &apply_kernels = graph->execution_order(); - // for kernels, execution order starts from 1 - int exec_order = 1; - auto debugger_i = mindspore::Debugger::GetInstance(); - DebugServices *debug_services = debugger_i->debug_services(); - auto watchpoint_table = debug_services->GetWatchpointTable(); - for (const auto &node : apply_kernels) { - MS_EXCEPTION_IF_NULL(node); - auto node_name = AnfAlgo::GetCNodeName(node); - std::string kernel_name = node->fullname_with_scope(); - auto output_size = AnfAlgo::GetOutputTensorNum(node); - if (debugger_i->partial_memory()) { - if (!debug_services->IsWatchPoint(kernel_name, watchpoint_table)) { - continue; - } - } - for (size_t j = 0; j < output_size; ++j) { - auto addr = AnfAlgo::GetOutputAddr(node, j); - auto type = AnfAlgo::GetOutputInferDataType(node, j); - auto format = kOpFormat_DEFAULT; - string tensor_name = kernel_name + ':' + std::to_string(j); - auto ascend_addr = dynamic_cast(addr); - MS_EXCEPTION_IF_NULL(ascend_addr); - ShapeVector int_shapes; - auto shape = AnfAlgo::GetOutputDeviceShape(node, j); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = ascend_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); - if (!ret) { - MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name - << ", host_format:" << format << ".!"; - } - } - exec_order = exec_order + 1; - } -} - -} // namespace -#endif - -bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debugger *debugger) { +bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); #ifdef ENABLE_DEBUGGER - debugger_ = debugger; MS_LOG(INFO) << "Start load step"; uint32_t cur_iter = 0; MS_LOG(INFO) << "Cur iter is " << cur_iter; // load output - LoadOutput(graph, debugger); + debugger_->LoadGraphOutputs(); // load parameters - if (debugger) debugger->LoadParametersAndConst(); + debugger_->LoadParametersAndConst(); #endif return true; } @@ -550,7 +504,7 @@ void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) { } } -bool AscendKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger) { +bool AscendKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) { bool ret = false; #if defined(_WIN32) || defined(_WIN64) auto start_time = std::chrono::steady_clock::now(); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h index 42384490a4..cd1e0da263 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h @@ -38,14 +38,14 @@ class AscendKernelRuntime : public KernelRuntime { AscendKernelRuntime() = default; ~AscendKernelRuntime() override; bool Init() override; - bool LoadData(session::KernelGraph *graph, Debugger *debugger) override; + bool LoadData(session::KernelGraph *graph) override; bool GenTask(const session::KernelGraph *graph); bool GenDynamicKernel(const session::KernelGraph *graph) override; bool RunDynamicKernelAsync(const session::KernelGraph *graph) override; bool LoadTask(const session::KernelGraph *graph); bool RunTask(const session::KernelGraph *graph); bool Load(session::KernelGraph *graph, bool is_task_sink) override; - bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; + bool Run(session::KernelGraph *graph, bool is_task_sink) override; void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector &inputs, const std::unordered_set &value_nodes, const std::vector &execution_order) override; diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index e9ab2628e8..71e8d26482 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -324,7 +324,7 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput resource_manager_.DecreaseSummaryRefCount(summary_outputs); } -bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink, Debugger *debugger) { +bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink) { MS_EXCEPTION_IF_NULL(kernel_graph); resource_manager_.IncreaseAddressRefCount(kernel_graph); diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h index 5f3058d6fa..928ca9430b 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h @@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime { ~CPUKernelRuntime() override = default; bool Init() override { return true; } - bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; + bool Run(session::KernelGraph *graph, bool is_task_sink) override; void AssignKernelAddress(session::KernelGraph *kernel_graph); void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector &inputs, VectorRef *outputs); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 6c3fe80155..c66bb2d46f 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -73,6 +73,7 @@ bool GPUKernelRuntime::Init() { (*init_nccl_comm_funcptr)(); } device_init_ = true; + SetDebugger(); return ret; } @@ -104,17 +105,15 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, bool read_data = false; auto &dump_json_parser = DumpJsonParser::GetInstance(); std::string kernel_name = kernel->fullname_with_scope(); - if (debugger) { - debugger->SetCurNode(kernel_name); - if (dump_enabled) { - auto dump_mode = dump_json_parser.dump_mode(); - // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list - if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) { - read_data = true; - } - } else if (debugger->debugger_enabled()) { - read_data = debugger->ReadNodeDataRequired(); + debugger->SetCurNode(kernel_name); + if (dump_enabled) { + auto dump_mode = dump_json_parser.dump_mode(); + // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list + if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) { + read_data = true; } + } else if (debugger->debugger_enabled()) { + read_data = debugger->ReadNodeDataRequired(); } if (!read_data) { return; @@ -169,25 +168,8 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, } } } - debugger->PostExecuteNode(); } - -void UpdateStepNum(Debugger *debugger, bool dump_enabled) { - if (debugger && (debugger->debugger_enabled() || dump_enabled)) { - auto cur_step_num = debugger->step_num(); - cur_step_num = cur_step_num + 1; - debugger->SetStepNum(cur_step_num); - } -} - -void ClearCurrentData(Debugger *debugger, bool dump_enabled) { - if (debugger && (debugger->debugger_enabled() || dump_enabled)) { - DebugServices *debug_services = debugger->debug_services(); - TensorLoader *tensor_loader = debug_services->tensor_loader(); - tensor_loader->EmptyCurrentTensor(); - } -} } // namespace DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, @@ -345,7 +327,7 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) { } } -bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger) { +bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) { struct timeval start_time, end_time; (void)gettimeofday(&start_time, nullptr); bool ret = true; @@ -368,7 +350,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debug mem_reuse_util_ = mem_reuse_iter->second; MS_EXCEPTION_IF_NULL(mem_reuse_util_); - ret = RunOneStep(graph, debugger); + ret = RunOneStep(graph); } else { py::gil_scoped_release gil_release; ret = LaunchKernel(graph); @@ -381,28 +363,28 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debug return ret; } -bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph, Debugger *debugger) { +bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) { bool ret = true; auto graph_id = graph->graph_id(); if (!is_first_step_map_[graph_id]) { // Normally run graph - ret = LaunchKernelDynamic(graph, debugger); + ret = LaunchKernelDynamic(graph); } else { // Mock run first step - ret = LaunchKernelDynamic(graph, debugger, true, false); + ret = LaunchKernelDynamic(graph, true, false); if (ret) { // Normally run graph - ret = LaunchKernelDynamic(graph, debugger); + ret = LaunchKernelDynamic(graph); } else { // Trigger memory swap - ret = SearchMemSwapScheme(graph, debugger); + ret = SearchMemSwapScheme(graph); } is_first_step_map_[graph_id] = false; } return ret; } -bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger) { +bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) { MS_LOG(WARNING) << "Run out of memory and try memory swapping, it may take some time, please wait a moment."; bool ret = false; ClearKernelOldOutputAndWorkspace(graph); @@ -416,7 +398,7 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, De if (!mem_swap_manager_->RetreatSwapInfo()) { return false; } - ret = LaunchKernelDynamic(graph, debugger, true, false); + ret = LaunchKernelDynamic(graph, true, false); if (!ret) { ClearKernelOldOutputAndWorkspace(graph); } @@ -424,14 +406,14 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, De mem_swap_manager_->AssignHostMemory(); // Time profiling - ret = LaunchKernelDynamic(graph, debugger, false, true); + ret = LaunchKernelDynamic(graph, false, true); if (!ret) { return ret; } - return RefineMemSwapScheme(graph, debugger); + return RefineMemSwapScheme(graph); } -bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger) { +bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph) { MS_LOG(WARNING) << "Refine memory swap scheme, it may take some time, please wait a moment."; auto &kernels = graph->execution_order(); for (const auto &kernel : kernels) { @@ -444,7 +426,7 @@ bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph, De bool ret = false; while (!ret) { mem_swap_manager_->AdjustSwapInPos(kernel, swap_in_task_idx); - ret = LaunchKernelDynamic(graph, debugger, true, false); + ret = LaunchKernelDynamic(graph, true, false); if (!ret) { ClearKernelOldOutputAndWorkspace(graph); ClearSwapInfo(true); @@ -583,8 +565,7 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g } } -bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, Debugger *debugger, bool mock, - bool profiling) { +bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(mem_reuse_util_); // Reset the reference count. @@ -593,10 +574,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De AllocCommunicationOpDynamicRes(graph); AllocInplaceNodeMemory(graph); - debugger_ = debugger; bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); if (!mock) { - UpdateStepNum(debugger, dump_enabled); + debugger_->UpdateStepNum(); } auto &kernels = graph->execution_order(); int exec_order = 1; @@ -618,7 +598,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De if (!ret) { if (!mock) { // invalidate current data collected by the debugger - ClearCurrentData(debugger, dump_enabled); + debugger_->ClearCurrentData(); } return false; } @@ -639,7 +619,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs); } // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) - LoadKernelData(debugger, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, + LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, dump_enabled); } exec_order = exec_order + 1; @@ -647,14 +627,14 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De if (!UpdateMemorySwapTask(kernel, mock, profiling)) { if (!mock) { // invalidate current data collected by the debugger - ClearCurrentData(debugger, dump_enabled); + debugger_->ClearCurrentData(); } return false; } } if (!mock) { // collect weights and bias for dump mode - if (debugger) debugger->LoadParametersAndConst(); + debugger_->LoadParametersAndConst(); CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); } ClearSwapInfo(mock); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h index 55c977cd31..0fcb5a0fe0 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.h @@ -42,7 +42,7 @@ class GPUKernelRuntime : public KernelRuntime { const std::unordered_set &value_nodes, const std::vector &execution_order) override; void AssignMemory(session::KernelGraph *graph) override; - bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; + bool Run(session::KernelGraph *graph, bool is_task_sink) override; bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; } bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; } @@ -67,11 +67,10 @@ class GPUKernelRuntime : public KernelRuntime { void ClearKernelOutputAddress(const session::KernelGraph *graph); void ClearKernelWorkspaceAddress(const session::KernelGraph *graph); void ClearKernelOldOutputAndWorkspace(const session::KernelGraph *graph); - bool RunOneStep(const session::KernelGraph *graph, Debugger *debugger = nullptr); - bool SearchMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger = nullptr); - bool RefineMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger = nullptr); - bool LaunchKernelDynamic(const session::KernelGraph *graph, Debugger *debugger = nullptr, bool mock = false, - bool profiling = false); + bool RunOneStep(const session::KernelGraph *graph); + bool SearchMemSwapScheme(const session::KernelGraph *graph); + bool RefineMemSwapScheme(const session::KernelGraph *graph); + bool LaunchKernelDynamic(const session::KernelGraph *graph, bool mock = false, bool profiling = false); void LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs, const AddressPtrList &workspace, const AddressPtrList &outputs); bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size, bool mock); diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 63ba6a5613..4dbda945b9 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -39,7 +39,7 @@ KernelRuntime::~KernelRuntime() {} bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } -bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; } +bool KernelRuntime::LoadData(session::KernelGraph *graph) { return false; } bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { MS_EXCEPTION_IF_NULL(kernel); diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index bee9979865..f941abf7fc 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -56,9 +56,9 @@ class KernelRuntime { void RunOpClearMemory(const session::KernelGraph *graph); static bool DumpDataEnabled(); static bool DumpDataEnabledIteration(); - virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); + virtual bool LoadData(session::KernelGraph *graph); virtual bool Load(session::KernelGraph *graph, bool is_task_sink); - virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; + virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0; virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0; virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0; bool LaunchKernel(const session::KernelGraph *graph); @@ -89,6 +89,13 @@ class KernelRuntime { uint32_t device_id() { return device_id_; } DeviceAddressPtr AssignSingleOpLaunchMemory(size_t size, const std::string &format, TypeId type); + // set debugger + void SetDebugger() { +#if !defined(_WIN32) && !defined(_WIN64) + debugger_ = Debugger::GetInstance(); +#endif + } + protected: virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, TypeId type_id) = 0; @@ -122,8 +129,8 @@ class KernelRuntime { protected: uint32_t device_id_{0}; -#ifdef ENABLE_DEBUGGER - Debugger *debugger_; +#if !defined(_WIN32) && !defined(_WIN64) + std::shared_ptr debugger_; #endif void *stream_ = nullptr; std::shared_ptr mem_manager_{nullptr};