From e4ebdc599e3fa362b1b6b2911db1d1d4455b5a7f Mon Sep 17 00:00:00 2001 From: Adel Shafiei Date: Tue, 13 Oct 2020 14:41:54 -0400 Subject: [PATCH] Added input/output kernel dump support based on the config file --- .../ccsrc/debug/data_dump/dump_json_parser.cc | 5 -- .../runtime/device/gpu/gpu_kernel_runtime.cc | 86 ++++++++++--------- 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index adfedb4789..bc5463f899 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -307,11 +307,6 @@ void DumpJsonParser::JudgeDumpEnabled() { MS_EXCEPTION_IF_NULL(context); if (context->get_param(MS_CTX_DEVICE_TARGET) == kGPUDevice) { async_dump_enabled_ = false; - // GPU not support dump kernel inputs - if (input_output_ != kDumpOutputOnly) { - MS_LOG(WARNING) << "Data dump only support dump kernel output when device target is GPU"; - input_output_ = kDumpOutputOnly; - } } if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice) { diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index 08a9fc9f9e..f87043b15c 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -102,11 +102,11 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, bool dump_enabled) { // check if we should read the kernel data bool read_data = false; + auto &dump_json_parser = DumpJsonParser::GetInstance(); std::string kernel_name = kernel->fullname_with_scope(); if (debugger) { debugger->SetCurNode(kernel_name); if (dump_enabled) { - auto &dump_json_parser = DumpJsonParser::GetInstance(); auto dump_mode = dump_json_parser.dump_mode(); // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) { @@ -120,49 +120,53 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, return; } - // get inputs - auto input_size = AnfAlgo::GetInputTensorNum(kernel); - for (size_t j = 0; j < input_size; ++j) { - auto input_kernel = kernel->input(j + 1); - std::string input_kernel_name = input_kernel->fullname_with_scope(); - auto addr = kernel_inputs[j]; - auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX); - auto format = kOpFormat_DEFAULT; - auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); - string input_tensor_name = input_kernel_name + ':' + "0"; - ShapeVector int_shapes; - auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true); - if (!ret) { - MS_LOG(ERROR) << "LoadMemToHost:" - << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; + if (dump_json_parser.InputNeedDump()) { + // get inputs + auto input_size = AnfAlgo::GetInputTensorNum(kernel); + for (size_t j = 0; j < input_size; ++j) { + auto input_kernel = kernel->input(j + 1); + std::string input_kernel_name = input_kernel->fullname_with_scope(); + auto addr = kernel_inputs[j]; + auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX); + auto format = kOpFormat_DEFAULT; + auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); + string input_tensor_name = input_kernel_name + ':' + "0"; + ShapeVector int_shapes; + auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX); + (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), + [](size_t inner_item) { return SizeToInt(inner_item); }); + auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true); + if (!ret) { + MS_LOG(ERROR) << "LoadMemToHost:" + << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!"; + } } } - // get outputs - auto output_size = AnfAlgo::GetOutputTensorNum(kernel); - auto node_name = AnfAlgo::GetCNodeName(kernel); - - std::vector real_outputs; - real_outputs = CheckRealOutput(node_name, output_size); - - for (std::vector::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) { - auto j = *it; - auto addr = kernel_outputs[j]; - auto type = AnfAlgo::GetOutputInferDataType(kernel, j); - auto format = kOpFormat_DEFAULT; - auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); - string tensor_name = kernel_name + ':' + std::to_string(j); - ShapeVector int_shapes; - auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j); - (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), - [](size_t inner_item) { return SizeToInt(inner_item); }); - auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); - if (!ret) { - MS_LOG(ERROR) << "LoadMemToHost:" - << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; + if (dump_json_parser.OutputNeedDump()) { + // get outputs + auto output_size = AnfAlgo::GetOutputTensorNum(kernel); + auto node_name = AnfAlgo::GetCNodeName(kernel); + + std::vector real_outputs; + real_outputs = CheckRealOutput(node_name, output_size); + + for (std::vector::iterator it = real_outputs.begin(); it != real_outputs.end(); ++it) { + auto j = *it; + auto addr = kernel_outputs[j]; + auto type = AnfAlgo::GetOutputInferDataType(kernel, j); + auto format = kOpFormat_DEFAULT; + auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); + string tensor_name = kernel_name + ':' + std::to_string(j); + ShapeVector int_shapes; + auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j); + (void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes), + [](size_t inner_item) { return SizeToInt(inner_item); }); + auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false); + if (!ret) { + MS_LOG(ERROR) << "LoadMemToHost:" + << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!"; + } } }