remove dbg args from runtime and remove needless argument from IsWatchpoint

pull/7263/head
Harshvardhan Gupta 5 years ago
parent 687e9126aa
commit 744355a005

@ -20,8 +20,8 @@
#include "runtime/device/ascend/ascend_stream_assign.h" #include "runtime/device/ascend/ascend_stream_assign.h"
#endif #endif
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
#include "debug/debugger/debugger.h"
#include "debug/debug_services.h" #include "debug/debug_services.h"
#include "debug/debugger/debugger.h"
#endif #endif
namespace mindspore { namespace mindspore {
@ -82,9 +82,8 @@ bool BestFitMemReuse::IsUsable(const KernelDefPtr &kernel_curr, const MembufPtr
auto debugger_ = mindspore::Debugger::GetInstance(); auto debugger_ = mindspore::Debugger::GetInstance();
if (debugger_->DebuggerBackendEnabled()) { if (debugger_->DebuggerBackendEnabled()) {
DebugServices *debug_services = debugger_->debug_services(); DebugServices *debug_services = debugger_->debug_services();
auto watchpoint_table = debug_services->GetWatchpointTable();
std::string current_kernel_name = kernel_curr->scope_full_name(); std::string current_kernel_name = kernel_curr->scope_full_name();
if (debug_services->IsWatchPoint(current_kernel_name, watchpoint_table)) { if (debug_services->IsWatchPoint(current_kernel_name)) {
return false; return false;
} }
} }

@ -602,7 +602,7 @@ void AscendSession::LoadTensor(const std::shared_ptr<KernelGraph> &kernel_graph)
tensor_loader->EmptyTensor(); tensor_loader->EmptyTensor();
uint32_t iter_num = tensor_loader->GetIterNum(); uint32_t iter_num = tensor_loader->GetIterNum();
tensor_loader->set_iter_num(++iter_num); tensor_loader->set_iter_num(++iter_num);
(void)runtime_instance->LoadData(kernel_graph.get(), debugger_.get()); (void)runtime_instance->LoadData(kernel_graph.get());
tensor_loader->EmptyPrevTensor(); tensor_loader->EmptyPrevTensor();
} }
#endif #endif

@ -218,11 +218,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const { void GPUSession::Execute(const std::shared_ptr<KernelGraph> &kernel_graph) const {
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_); auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
MS_EXCEPTION_IF_NULL(runtime_instance); MS_EXCEPTION_IF_NULL(runtime_instance);
#ifdef ENABLE_DEBUGGER
if (!runtime_instance->Run(kernel_graph.get(), false, debugger_.get())) {
#else
if (!runtime_instance->Run(kernel_graph.get(), false)) { if (!runtime_instance->Run(kernel_graph.get(), false)) {
#endif
MS_LOG(EXCEPTION) << "GPU execute graph failed!"; MS_LOG(EXCEPTION) << "GPU execute graph failed!";
} }
} }

@ -234,8 +234,7 @@ void DebugServices::ReadNodesTensors(std::vector<std::string> name, std::vector<
} }
} }
bool DebugServices::IsWatchPoint(std::string kernel_name, bool DebugServices::IsWatchPoint(std::string kernel_name) {
std::unordered_map<unsigned int, watchpoint_t> watchpoint_table) {
bool ret = false; bool ret = false;
for (auto w_table_item : watchpoint_table) { for (auto w_table_item : watchpoint_table) {
auto check_node_list = std::get<1>(w_table_item).check_node_list; auto check_node_list = std::get<1>(w_table_item).check_node_list;

@ -136,7 +136,7 @@ class DebugServices {
std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size, std::vector<char *> *data_ptr, std::vector<unsigned int> *data_size,
std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape); std::vector<TypePtr> *dtype, std::vector<std::vector<int>> *shape);
bool IsWatchPoint(std::string kernel_name, std::unordered_map<unsigned int, watchpoint_t> watchpoint_table); bool IsWatchPoint(std::string kernel_name);
TensorLoader *tensor_loader() const; TensorLoader *tensor_loader() const;

@ -49,7 +49,7 @@ namespace mindspore {
DebuggerPtr Debugger::debugger_ = nullptr; DebuggerPtr Debugger::debugger_ = nullptr;
std::mutex Debugger::instance_lock_; std::mutex Debugger::instance_lock_;
static const size_t PRAMATER_OUTPUT_INDEX = 0; static const size_t PARAMETER_OUTPUT_INDEX = 0;
static const size_t VALUE_NODE_OUTPUT_INDEX = 0; static const size_t VALUE_NODE_OUTPUT_INDEX = 0;
Debugger::Debugger() Debugger::Debugger()
@ -279,8 +279,7 @@ void Debugger::PostExecute() {
bool Debugger::ReadNodeDataRequired() { bool Debugger::ReadNodeDataRequired() {
if (debugger_enabled_ && !is_dataset_graph_) { if (debugger_enabled_ && !is_dataset_graph_) {
auto watchpoint_table = debug_services_->GetWatchpointTable(); auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_);
auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table);
// if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data // if node has a watchpoint on it, is next_to node, or continue_to node then read the kernel tensor data
if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) { if (is_watchpoint || (run_level_ == "node" && (node_name_ == "" || node_name_ == cur_name_))) {
return true; return true;
@ -296,8 +295,7 @@ void Debugger::PostExecuteNode() {
return; return;
} }
if (debugger_enabled_ && !is_dataset_graph_) { if (debugger_enabled_ && !is_dataset_graph_) {
auto watchpoint_table = debug_services_->GetWatchpointTable(); auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_);
auto is_watchpoint = debug_services_->IsWatchPoint(cur_name_, watchpoint_table);
// if kernel is watchpoint,and get hit. suspend. // if kernel is watchpoint,and get hit. suspend.
bool hit_empty_flag = true; bool hit_empty_flag = true;
@ -914,7 +912,7 @@ void Debugger::LoadParametersAndConst() {
MS_LOG(INFO) << "Start to load Parameters!"; MS_LOG(INFO) << "Start to load Parameters!";
const auto &parameters = graph_ptr_->inputs(); const auto &parameters = graph_ptr_->inputs();
for (auto &item : parameters) { for (auto &item : parameters) {
LoadSingleAnfnode(item, PRAMATER_OUTPUT_INDEX); LoadSingleAnfnode(item, PARAMETER_OUTPUT_INDEX);
} }
// load value nodes // load value nodes
// get all constant avlues from the graph // get all constant avlues from the graph
@ -925,4 +923,50 @@ void Debugger::LoadParametersAndConst() {
} }
} }
void Debugger::LoadGraphOutputs() {
if (!(debugger_enabled() && device_target_ == kAscendDevice)) return;
MS_EXCEPTION_IF_NULL(graph_ptr_);
const auto &apply_kernels = graph_ptr_->execution_order();
// for kernels, execution order starts from 1
int exec_order = 1;
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
auto output_size = AnfAlgo::GetOutputTensorNum(node);
if (partial_memory_) {
if (!debug_services_->IsWatchPoint(kernel_name)) {
continue;
}
}
for (size_t j = 0; j < output_size; ++j) {
auto addr = AnfAlgo::GetOutputAddr(node, j);
MS_EXCEPTION_IF_NULL(addr);
auto type = AnfAlgo::GetOutputInferDataType(node, j);
auto format = kOpFormat_DEFAULT;
string tensor_name = kernel_name + ':' + std::to_string(j);
ShapeVector int_shapes;
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
auto ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost:"
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
}
}
exec_order = exec_order + 1;
}
}
void Debugger::UpdateStepNum() {
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
++num_step_;
}
void Debugger::ClearCurrentData() {
if (device_target_ == kGPUDevice && (debugger_enabled_ || device::KernelRuntime::DumpDataEnabledIteration()))
debug_services_->tensor_loader()->EmptyCurrentTensor();
}
} // namespace mindspore } // namespace mindspore

@ -105,6 +105,12 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
void LoadParametersAndConst(); void LoadParametersAndConst();
void UpdateStepNum();
void ClearCurrentData();
void LoadGraphOutputs();
private: private:
// private constructor for singleton // private constructor for singleton
Debugger(); Debugger();

@ -263,6 +263,7 @@ bool AscendKernelRuntime::Init() {
if (!ret) { if (!ret) {
return ret; return ret;
} }
SetDebugger();
mem_manager_ = std::make_shared<AscendMemoryManager>(); mem_manager_ = std::make_shared<AscendMemoryManager>();
MS_EXCEPTION_IF_NULL(mem_manager_); MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory(); mem_manager_->MallocDeviceMemory();
@ -271,63 +272,16 @@ bool AscendKernelRuntime::Init() {
return ret; return ret;
} }
#ifdef ENABLE_DEBUGGER bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph) {
namespace {
void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph);
// trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
bool trans_flag = false;
const auto &apply_kernels = graph->execution_order();
// for kernels, execution order starts from 1
int exec_order = 1;
auto debugger_i = mindspore::Debugger::GetInstance();
DebugServices *debug_services = debugger_i->debug_services();
auto watchpoint_table = debug_services->GetWatchpointTable();
for (const auto &node : apply_kernels) {
MS_EXCEPTION_IF_NULL(node);
auto node_name = AnfAlgo::GetCNodeName(node);
std::string kernel_name = node->fullname_with_scope();
auto output_size = AnfAlgo::GetOutputTensorNum(node);
if (debugger_i->partial_memory()) {
if (!debug_services->IsWatchPoint(kernel_name, watchpoint_table)) {
continue;
}
}
for (size_t j = 0; j < output_size; ++j) {
auto addr = AnfAlgo::GetOutputAddr(node, j);
auto type = AnfAlgo::GetOutputInferDataType(node, j);
auto format = kOpFormat_DEFAULT;
string tensor_name = kernel_name + ':' + std::to_string(j);
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
MS_EXCEPTION_IF_NULL(ascend_addr);
ShapeVector int_shapes;
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
[](size_t inner_item) { return SizeToInt(inner_item); });
auto ret = ascend_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false);
if (!ret) {
MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name
<< ", host_format:" << format << ".!";
}
}
exec_order = exec_order + 1;
}
}
} // namespace
#endif
bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debugger *debugger) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
#ifdef ENABLE_DEBUGGER #ifdef ENABLE_DEBUGGER
debugger_ = debugger;
MS_LOG(INFO) << "Start load step"; MS_LOG(INFO) << "Start load step";
uint32_t cur_iter = 0; uint32_t cur_iter = 0;
MS_LOG(INFO) << "Cur iter is " << cur_iter; MS_LOG(INFO) << "Cur iter is " << cur_iter;
// load output // load output
LoadOutput(graph, debugger); debugger_->LoadGraphOutputs();
// load parameters // load parameters
if (debugger) debugger->LoadParametersAndConst(); debugger_->LoadParametersAndConst();
#endif #endif
return true; return true;
} }
@ -550,7 +504,7 @@ void AscendKernelRuntime::DebugTaskIdName(GraphId graph_id) {
} }
} }
bool AscendKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger) { bool AscendKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
bool ret = false; bool ret = false;
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
auto start_time = std::chrono::steady_clock::now(); auto start_time = std::chrono::steady_clock::now();

@ -38,14 +38,14 @@ class AscendKernelRuntime : public KernelRuntime {
AscendKernelRuntime() = default; AscendKernelRuntime() = default;
~AscendKernelRuntime() override; ~AscendKernelRuntime() override;
bool Init() override; bool Init() override;
bool LoadData(session::KernelGraph *graph, Debugger *debugger) override; bool LoadData(session::KernelGraph *graph) override;
bool GenTask(const session::KernelGraph *graph); bool GenTask(const session::KernelGraph *graph);
bool GenDynamicKernel(const session::KernelGraph *graph) override; bool GenDynamicKernel(const session::KernelGraph *graph) override;
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override; bool RunDynamicKernelAsync(const session::KernelGraph *graph) override;
bool LoadTask(const session::KernelGraph *graph); bool LoadTask(const session::KernelGraph *graph);
bool RunTask(const session::KernelGraph *graph); bool RunTask(const session::KernelGraph *graph);
bool Load(session::KernelGraph *graph, bool is_task_sink) override; bool Load(session::KernelGraph *graph, bool is_task_sink) override;
bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; bool Run(session::KernelGraph *graph, bool is_task_sink) override;
void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs, void ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
const std::unordered_set<ValueNodePtr> &value_nodes, const std::unordered_set<ValueNodePtr> &value_nodes,
const std::vector<CNodePtr> &execution_order) override; const std::vector<CNodePtr> &execution_order) override;

@ -324,7 +324,7 @@ void CPUKernelRuntime::DecreaseSummaryRefCount(const session::NamedSummaryOutput
resource_manager_.DecreaseSummaryRefCount(summary_outputs); resource_manager_.DecreaseSummaryRefCount(summary_outputs);
} }
bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink, Debugger *debugger) { bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink) {
MS_EXCEPTION_IF_NULL(kernel_graph); MS_EXCEPTION_IF_NULL(kernel_graph);
resource_manager_.IncreaseAddressRefCount(kernel_graph); resource_manager_.IncreaseAddressRefCount(kernel_graph);

@ -36,7 +36,7 @@ class CPUKernelRuntime : public KernelRuntime {
~CPUKernelRuntime() override = default; ~CPUKernelRuntime() override = default;
bool Init() override { return true; } bool Init() override { return true; }
bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; bool Run(session::KernelGraph *graph, bool is_task_sink) override;
void AssignKernelAddress(session::KernelGraph *kernel_graph); void AssignKernelAddress(session::KernelGraph *kernel_graph);
void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs, void CreateOutputTensors(session::KernelGraph *kernel_graph, const std::vector<tensor::TensorPtr> &inputs,
VectorRef *outputs); VectorRef *outputs);

@ -73,6 +73,7 @@ bool GPUKernelRuntime::Init() {
(*init_nccl_comm_funcptr)(); (*init_nccl_comm_funcptr)();
} }
device_init_ = true; device_init_ = true;
SetDebugger();
return ret; return ret;
} }
@ -104,17 +105,15 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
bool read_data = false; bool read_data = false;
auto &dump_json_parser = DumpJsonParser::GetInstance(); auto &dump_json_parser = DumpJsonParser::GetInstance();
std::string kernel_name = kernel->fullname_with_scope(); std::string kernel_name = kernel->fullname_with_scope();
if (debugger) { debugger->SetCurNode(kernel_name);
debugger->SetCurNode(kernel_name); if (dump_enabled) {
if (dump_enabled) { auto dump_mode = dump_json_parser.dump_mode();
auto dump_mode = dump_json_parser.dump_mode(); // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
// dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) { read_data = true;
read_data = true;
}
} else if (debugger->debugger_enabled()) {
read_data = debugger->ReadNodeDataRequired();
} }
} else if (debugger->debugger_enabled()) {
read_data = debugger->ReadNodeDataRequired();
} }
if (!read_data) { if (!read_data) {
return; return;
@ -169,25 +168,8 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
} }
} }
} }
debugger->PostExecuteNode(); debugger->PostExecuteNode();
} }
void UpdateStepNum(Debugger *debugger, bool dump_enabled) {
if (debugger && (debugger->debugger_enabled() || dump_enabled)) {
auto cur_step_num = debugger->step_num();
cur_step_num = cur_step_num + 1;
debugger->SetStepNum(cur_step_num);
}
}
void ClearCurrentData(Debugger *debugger, bool dump_enabled) {
if (debugger && (debugger->debugger_enabled() || dump_enabled)) {
DebugServices *debug_services = debugger->debug_services();
TensorLoader *tensor_loader = debug_services->tensor_loader();
tensor_loader->EmptyCurrentTensor();
}
}
} // namespace } // namespace
DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, DeviceAddressPtr GPUKernelRuntime::CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
@ -345,7 +327,7 @@ void GPUKernelRuntime::AssignMemory(session::KernelGraph *graph) {
} }
} }
bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger) { bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink) {
struct timeval start_time, end_time; struct timeval start_time, end_time;
(void)gettimeofday(&start_time, nullptr); (void)gettimeofday(&start_time, nullptr);
bool ret = true; bool ret = true;
@ -368,7 +350,7 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debug
mem_reuse_util_ = mem_reuse_iter->second; mem_reuse_util_ = mem_reuse_iter->second;
MS_EXCEPTION_IF_NULL(mem_reuse_util_); MS_EXCEPTION_IF_NULL(mem_reuse_util_);
ret = RunOneStep(graph, debugger); ret = RunOneStep(graph);
} else { } else {
py::gil_scoped_release gil_release; py::gil_scoped_release gil_release;
ret = LaunchKernel(graph); ret = LaunchKernel(graph);
@ -381,28 +363,28 @@ bool GPUKernelRuntime::Run(session::KernelGraph *graph, bool is_task_sink, Debug
return ret; return ret;
} }
bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph, Debugger *debugger) { bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) {
bool ret = true; bool ret = true;
auto graph_id = graph->graph_id(); auto graph_id = graph->graph_id();
if (!is_first_step_map_[graph_id]) { if (!is_first_step_map_[graph_id]) {
// Normally run graph // Normally run graph
ret = LaunchKernelDynamic(graph, debugger); ret = LaunchKernelDynamic(graph);
} else { } else {
// Mock run first step // Mock run first step
ret = LaunchKernelDynamic(graph, debugger, true, false); ret = LaunchKernelDynamic(graph, true, false);
if (ret) { if (ret) {
// Normally run graph // Normally run graph
ret = LaunchKernelDynamic(graph, debugger); ret = LaunchKernelDynamic(graph);
} else { } else {
// Trigger memory swap // Trigger memory swap
ret = SearchMemSwapScheme(graph, debugger); ret = SearchMemSwapScheme(graph);
} }
is_first_step_map_[graph_id] = false; is_first_step_map_[graph_id] = false;
} }
return ret; return ret;
} }
bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger) { bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) {
MS_LOG(WARNING) << "Run out of memory and try memory swapping, it may take some time, please wait a moment."; MS_LOG(WARNING) << "Run out of memory and try memory swapping, it may take some time, please wait a moment.";
bool ret = false; bool ret = false;
ClearKernelOldOutputAndWorkspace(graph); ClearKernelOldOutputAndWorkspace(graph);
@ -416,7 +398,7 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, De
if (!mem_swap_manager_->RetreatSwapInfo()) { if (!mem_swap_manager_->RetreatSwapInfo()) {
return false; return false;
} }
ret = LaunchKernelDynamic(graph, debugger, true, false); ret = LaunchKernelDynamic(graph, true, false);
if (!ret) { if (!ret) {
ClearKernelOldOutputAndWorkspace(graph); ClearKernelOldOutputAndWorkspace(graph);
} }
@ -424,14 +406,14 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph, De
mem_swap_manager_->AssignHostMemory(); mem_swap_manager_->AssignHostMemory();
// Time profiling // Time profiling
ret = LaunchKernelDynamic(graph, debugger, false, true); ret = LaunchKernelDynamic(graph, false, true);
if (!ret) { if (!ret) {
return ret; return ret;
} }
return RefineMemSwapScheme(graph, debugger); return RefineMemSwapScheme(graph);
} }
bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger) { bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph) {
MS_LOG(WARNING) << "Refine memory swap scheme, it may take some time, please wait a moment."; MS_LOG(WARNING) << "Refine memory swap scheme, it may take some time, please wait a moment.";
auto &kernels = graph->execution_order(); auto &kernels = graph->execution_order();
for (const auto &kernel : kernels) { for (const auto &kernel : kernels) {
@ -444,7 +426,7 @@ bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph, De
bool ret = false; bool ret = false;
while (!ret) { while (!ret) {
mem_swap_manager_->AdjustSwapInPos(kernel, swap_in_task_idx); mem_swap_manager_->AdjustSwapInPos(kernel, swap_in_task_idx);
ret = LaunchKernelDynamic(graph, debugger, true, false); ret = LaunchKernelDynamic(graph, true, false);
if (!ret) { if (!ret) {
ClearKernelOldOutputAndWorkspace(graph); ClearKernelOldOutputAndWorkspace(graph);
ClearSwapInfo(true); ClearSwapInfo(true);
@ -583,8 +565,7 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
} }
} }
bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, Debugger *debugger, bool mock, bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bool mock, bool profiling) {
bool profiling) {
MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(graph);
MS_EXCEPTION_IF_NULL(mem_reuse_util_); MS_EXCEPTION_IF_NULL(mem_reuse_util_);
// Reset the reference count. // Reset the reference count.
@ -593,10 +574,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De
AllocCommunicationOpDynamicRes(graph); AllocCommunicationOpDynamicRes(graph);
AllocInplaceNodeMemory(graph); AllocInplaceNodeMemory(graph);
debugger_ = debugger;
bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration(); bool dump_enabled = GPUKernelRuntime::DumpDataEnabledIteration();
if (!mock) { if (!mock) {
UpdateStepNum(debugger, dump_enabled); debugger_->UpdateStepNum();
} }
auto &kernels = graph->execution_order(); auto &kernels = graph->execution_order();
int exec_order = 1; int exec_order = 1;
@ -618,7 +598,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De
if (!ret) { if (!ret) {
if (!mock) { if (!mock) {
// invalidate current data collected by the debugger // invalidate current data collected by the debugger
ClearCurrentData(debugger, dump_enabled); debugger_->ClearCurrentData();
} }
return false; return false;
} }
@ -639,7 +619,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De
LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs); LaunchKernelWithTimeProfiling(kernel, kernel_inputs, kernel_workspaces, kernel_outputs);
} }
// called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost) // called once per kernel to collect the outputs to the kernel (does a SyncDeviceToHost)
LoadKernelData(debugger, kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_, LoadKernelData(debugger_.get(), kernel, kernel_inputs, kernel_workspaces, kernel_outputs, exec_order, stream_,
dump_enabled); dump_enabled);
} }
exec_order = exec_order + 1; exec_order = exec_order + 1;
@ -647,14 +627,14 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De
if (!UpdateMemorySwapTask(kernel, mock, profiling)) { if (!UpdateMemorySwapTask(kernel, mock, profiling)) {
if (!mock) { if (!mock) {
// invalidate current data collected by the debugger // invalidate current data collected by the debugger
ClearCurrentData(debugger, dump_enabled); debugger_->ClearCurrentData();
} }
return false; return false;
} }
} }
if (!mock) { if (!mock) {
// collect weights and bias for dump mode // collect weights and bias for dump mode
if (debugger) debugger->LoadParametersAndConst(); debugger_->LoadParametersAndConst();
CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed."); CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
} }
ClearSwapInfo(mock); ClearSwapInfo(mock);

@ -42,7 +42,7 @@ class GPUKernelRuntime : public KernelRuntime {
const std::unordered_set<ValueNodePtr> &value_nodes, const std::unordered_set<ValueNodePtr> &value_nodes,
const std::vector<CNodePtr> &execution_order) override; const std::vector<CNodePtr> &execution_order) override;
void AssignMemory(session::KernelGraph *graph) override; void AssignMemory(session::KernelGraph *graph) override;
bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) override; bool Run(session::KernelGraph *graph, bool is_task_sink) override;
bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; } bool GenDynamicKernel(const session::KernelGraph *graph) override { return true; }
bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; } bool RunDynamicKernelAsync(const session::KernelGraph *graph) override { return true; }
@ -67,11 +67,10 @@ class GPUKernelRuntime : public KernelRuntime {
void ClearKernelOutputAddress(const session::KernelGraph *graph); void ClearKernelOutputAddress(const session::KernelGraph *graph);
void ClearKernelWorkspaceAddress(const session::KernelGraph *graph); void ClearKernelWorkspaceAddress(const session::KernelGraph *graph);
void ClearKernelOldOutputAndWorkspace(const session::KernelGraph *graph); void ClearKernelOldOutputAndWorkspace(const session::KernelGraph *graph);
bool RunOneStep(const session::KernelGraph *graph, Debugger *debugger = nullptr); bool RunOneStep(const session::KernelGraph *graph);
bool SearchMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger = nullptr); bool SearchMemSwapScheme(const session::KernelGraph *graph);
bool RefineMemSwapScheme(const session::KernelGraph *graph, Debugger *debugger = nullptr); bool RefineMemSwapScheme(const session::KernelGraph *graph);
bool LaunchKernelDynamic(const session::KernelGraph *graph, Debugger *debugger = nullptr, bool mock = false, bool LaunchKernelDynamic(const session::KernelGraph *graph, bool mock = false, bool profiling = false);
bool profiling = false);
void LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs, void LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs,
const AddressPtrList &workspace, const AddressPtrList &outputs); const AddressPtrList &workspace, const AddressPtrList &outputs);
bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size, bool mock); bool AttemptMallocMem(const DeviceAddressPtr &device_address, size_t size, bool mock);

@ -39,7 +39,7 @@ KernelRuntime::~KernelRuntime() {}
bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; } bool KernelRuntime::Load(session::KernelGraph *graph, bool is_task_sink) { return true; }
bool KernelRuntime::LoadData(session::KernelGraph *graph, Debugger *debugger) { return false; } bool KernelRuntime::LoadData(session::KernelGraph *graph) { return false; }
bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) { bool KernelRuntime::NodeOutputDeviceAddressExist(const AnfNodePtr &kernel, size_t index) {
MS_EXCEPTION_IF_NULL(kernel); MS_EXCEPTION_IF_NULL(kernel);

@ -56,9 +56,9 @@ class KernelRuntime {
void RunOpClearMemory(const session::KernelGraph *graph); void RunOpClearMemory(const session::KernelGraph *graph);
static bool DumpDataEnabled(); static bool DumpDataEnabled();
static bool DumpDataEnabledIteration(); static bool DumpDataEnabledIteration();
virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger); virtual bool LoadData(session::KernelGraph *graph);
virtual bool Load(session::KernelGraph *graph, bool is_task_sink); virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0; virtual bool Run(session::KernelGraph *graph, bool is_task_sink) = 0;
virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0; virtual bool GenDynamicKernel(const session::KernelGraph *graph) = 0;
virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0; virtual bool RunDynamicKernelAsync(const session::KernelGraph *graph) = 0;
bool LaunchKernel(const session::KernelGraph *graph); bool LaunchKernel(const session::KernelGraph *graph);
@ -89,6 +89,13 @@ class KernelRuntime {
uint32_t device_id() { return device_id_; } uint32_t device_id() { return device_id_; }
DeviceAddressPtr AssignSingleOpLaunchMemory(size_t size, const std::string &format, TypeId type); DeviceAddressPtr AssignSingleOpLaunchMemory(size_t size, const std::string &format, TypeId type);
// set debugger
void SetDebugger() {
#if !defined(_WIN32) && !defined(_WIN64)
debugger_ = Debugger::GetInstance();
#endif
}
protected: protected:
virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format, virtual DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
TypeId type_id) = 0; TypeId type_id) = 0;
@ -122,8 +129,8 @@ class KernelRuntime {
protected: protected:
uint32_t device_id_{0}; uint32_t device_id_{0};
#ifdef ENABLE_DEBUGGER #if !defined(_WIN32) && !defined(_WIN64)
Debugger *debugger_; std::shared_ptr<Debugger> debugger_;
#endif #endif
void *stream_ = nullptr; void *stream_ = nullptr;
std::shared_ptr<MemoryManager> mem_manager_{nullptr}; std::shared_ptr<MemoryManager> mem_manager_{nullptr};

Loading…
Cancel
Save