parent
daf6739b22
commit
372c2e7951
@ -1,17 +1,19 @@
|
||||
{
|
||||
"DumpSettings": {
|
||||
"common_dump_settings": {
|
||||
"dump_mode": 0,
|
||||
"path": "/test",
|
||||
"net_name": "ResNet50",
|
||||
"dump_mode": 1,
|
||||
"op_debug_mode": 3,
|
||||
"iteration": 0,
|
||||
"kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
|
||||
"input_output": 0,
|
||||
"kernels": ["Default/Conv-op12"],
|
||||
"support_device": [0,1,2,3,4,5,6,7]
|
||||
},
|
||||
|
||||
"DumpSettingsSpec": {
|
||||
"net_name": "net name eg:ResNet50",
|
||||
"dump_mode": "0: dump all kernels, 1: dump kernels in kernels list",
|
||||
"op_debug_mode": "0: close debug, 1: debug ai-core overflow, 2: debug atomic overflow, 3: debug all overflow",
|
||||
"iteration": "specified iteration ",
|
||||
"kernels": "op's full scope name which need to be dump"
|
||||
"e2e_dump_settings": {
|
||||
"enable": false,
|
||||
"trans_flag": false
|
||||
},
|
||||
"async_dump_settings": {
|
||||
"enable": false,
|
||||
"op_debug_mode": 0
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
namespace mindspore {
|
||||
class DumpJsonParser {
|
||||
public:
|
||||
static DumpJsonParser &GetInstance() {
|
||||
static DumpJsonParser instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
void Parse();
|
||||
static bool DumpToFile(const std::string &filename, const void *data, size_t len);
|
||||
bool NeedDump(const std::string &op_full_name) const;
|
||||
void MatchKernel(const std::string &kernel_name);
|
||||
void PrintUnusedKernel();
|
||||
|
||||
bool async_dump_enabled() const { return async_dump_enabled_; }
|
||||
bool e2e_dump_enabled() const { return e2e_dump_enabled_; }
|
||||
uint32_t dump_mode() const { return dump_mode_; }
|
||||
std::string path() const { return path_; }
|
||||
std::string net_name() const { return net_name_; }
|
||||
uint32_t iteration() const { return iteration_; }
|
||||
uint32_t input_output() const { return input_output_; }
|
||||
uint32_t op_debug_mode() const { return op_debug_mode_; }
|
||||
bool trans_flag() const { return trans_flag_; }
|
||||
uint32_t cur_dump_iter() { return cur_dump_iter_; }
|
||||
void UpdateDumpIter() { ++cur_dump_iter_; }
|
||||
bool InputNeedDump() const;
|
||||
bool OutputNeedDump() const;
|
||||
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;
|
||||
void UpdateNeedDumpKernels(NotNull<const session::KernelGraph *> kernel_graph);
|
||||
|
||||
private:
|
||||
DumpJsonParser() = default;
|
||||
~DumpJsonParser() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(DumpJsonParser)
|
||||
|
||||
std::mutex lock_;
|
||||
bool async_dump_enabled_{false};
|
||||
bool e2e_dump_enabled_{false};
|
||||
uint32_t dump_mode_{0};
|
||||
std::string path_;
|
||||
std::string net_name_;
|
||||
uint32_t iteration_{0};
|
||||
uint32_t input_output_{0};
|
||||
std::map<std::string, uint32_t> kernels_;
|
||||
std::set<uint32_t> support_devices_;
|
||||
uint32_t op_debug_mode_{0};
|
||||
bool trans_flag_{false};
|
||||
uint32_t cur_dump_iter_{0};
|
||||
|
||||
void ParseCommonDumpSetting(const nlohmann::json &content);
|
||||
void ParseAsyncDumpSetting(const nlohmann::json &content);
|
||||
void ParseE2eDumpSetting(const nlohmann::json &content);
|
||||
bool IsDumpEnabled();
|
||||
|
||||
auto CheckJsonKeyExist(const nlohmann::json &content, const std::string &key);
|
||||
|
||||
void ParseDumpMode(const nlohmann::json &content);
|
||||
void ParseDumpPath(const nlohmann::json &content);
|
||||
void ParseNetName(const nlohmann::json &content);
|
||||
void ParseIteration(const nlohmann::json &content);
|
||||
void ParseInputOutput(const nlohmann::json &content);
|
||||
void ParseKernels(const nlohmann::json &content);
|
||||
void ParseSupportDevice(const nlohmann::json &content);
|
||||
bool ParseEnable(const nlohmann::json &content);
|
||||
void ParseOpDebugMode(const nlohmann::json &content);
|
||||
|
||||
void JudgeDumpEnabled();
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DUMP_JSON_PARSER_H_
|
@ -0,0 +1,222 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debug/data_dump/e2e_dump_util.h"
|
||||
#include <algorithm>
|
||||
#include "debug/data_dump/dump_json_parser.h"
|
||||
#include "common/trans.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "utils/ms_context.h"
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
#include "debug/debug_services.h"
|
||||
#include "debug/tensor_load.h"
|
||||
#include "debug/debugger/debugger.h"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
const size_t PRAMATER_OUTPUT_INDEX = 0;
|
||||
}
|
||||
|
||||
namespace mindspore {
|
||||
void E2eDumpUtil::GetFileKernelName(NotNull<std::string *> kernel_name) {
|
||||
const std::string strsrc = "/";
|
||||
const std::string strdst = "--";
|
||||
std::string::size_type pos = 0;
|
||||
std::string::size_type srclen = strsrc.size();
|
||||
std::string::size_type dstlen = strdst.size();
|
||||
while ((pos = kernel_name->find(strsrc, pos)) != std::string::npos) {
|
||||
kernel_name->replace(pos, srclen, strdst);
|
||||
pos += dstlen;
|
||||
}
|
||||
}
|
||||
|
||||
bool E2eDumpUtil::IsDeviceTargetGPU() {
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
|
||||
}
|
||||
|
||||
void E2eDumpUtil::DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr,
|
||||
bool trans_flag, const ShapeVector &int_shapes, const TypeId &type) {
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
auto ret = addr->DumpMemToFile(trans_flag, file_path, format, int_shapes, type);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "DumpMemToFile Failed: flag:" << trans_flag << ", path:" << file_path << ", host_format:" << format
|
||||
<< ".!";
|
||||
}
|
||||
}
|
||||
|
||||
void E2eDumpUtil::DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
|
||||
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
|
||||
const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger) {
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
DebugServices *debug_services = debugger->debug_services();
|
||||
TensorLoader *tensor_loader = debug_services->tensor_loader();
|
||||
auto ret = tensor_loader->DumpTensorToFile(original_kernel_name, trans_flag, file_path, format, int_shapes, type,
|
||||
addr->type_id(), addr->format(), slot);
|
||||
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "DumpTensorToFile Failed: flag:" << std::to_string(trans_flag) << ", path:" << file_path
|
||||
<< ", host_format:" << format;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void E2eDumpUtil::GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag,
|
||||
NotNull<ShapeVector *> int_shapes) {
|
||||
if (trans_flag) {
|
||||
*int_shapes = trans::GetRuntimePaddingShape(node, index);
|
||||
} else {
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(node, index);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(*int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
}
|
||||
}
|
||||
|
||||
void E2eDumpUtil::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (!dump_json_parser.OutputNeedDump()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Start e2e dump output";
|
||||
bool trans_flag = dump_json_parser.trans_flag();
|
||||
const auto &apply_kernels = graph->execution_order();
|
||||
for (const auto &node : apply_kernels) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||
std::string kernel_name = node->fullname_with_scope();
|
||||
if (!dump_json_parser.NeedDump(kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
|
||||
GetFileKernelName(NOT_NULL(&kernel_name));
|
||||
auto output_size = AnfAlgo::GetOutputTensorNum(node);
|
||||
for (size_t j = 0; j < output_size; ++j) {
|
||||
auto addr = AnfAlgo::GetOutputAddr(node, j);
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(node, j, trans_flag, NOT_NULL(&int_shapes));
|
||||
auto type = AnfAlgo::GetOutputInferDataType(node, j);
|
||||
std::string file_path = dump_path + '/' + kernel_name + '_' + "output_" + std::to_string(j);
|
||||
if (IsDeviceTargetGPU()) {
|
||||
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
|
||||
debugger);
|
||||
} else {
|
||||
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void E2eDumpUtil::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
if (!dump_json_parser.InputNeedDump()) {
|
||||
return;
|
||||
}
|
||||
MS_LOG(INFO) << "Start e2e dump input";
|
||||
bool trans_flag = dump_json_parser.trans_flag();
|
||||
const auto &apply_kernels = graph->execution_order();
|
||||
for (const auto &node : apply_kernels) {
|
||||
MS_EXCEPTION_IF_NULL(node);
|
||||
auto node_name = AnfAlgo::GetCNodeName(node);
|
||||
std::string kernel_name = node->fullname_with_scope();
|
||||
if (!dump_json_parser.NeedDump(kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
DumpJsonParser::GetInstance().MatchKernel(kernel_name);
|
||||
GetFileKernelName(NOT_NULL(&kernel_name));
|
||||
auto input_size = AnfAlgo::GetInputTensorNum(node);
|
||||
for (size_t j = 0; j < input_size; ++j) {
|
||||
auto kernel_with_index = AnfAlgo::GetPrevNodeOutput(node, j);
|
||||
auto input = kernel_with_index.first;
|
||||
auto index = kernel_with_index.second;
|
||||
auto addr = AnfAlgo::GetOutputAddr(input, index);
|
||||
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(input, index, trans_flag, NOT_NULL(&int_shapes));
|
||||
auto type = AnfAlgo::GetOutputInferDataType(input, index);
|
||||
std::string file_path = dump_path + '/' + kernel_name + '_' + "input_" + std::to_string(j);
|
||||
if (IsDeviceTargetGPU()) {
|
||||
DumpGPUMemToFile(file_path, node->fullname_with_scope(), NOT_NULL(addr), trans_flag, int_shapes, type, j,
|
||||
debugger);
|
||||
} else {
|
||||
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void E2eDumpUtil::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
MS_LOG(INFO) << "Start e2e dump parameters";
|
||||
bool trans_flag = dump_json_parser.trans_flag();
|
||||
const auto ¶meters = graph->inputs();
|
||||
for (auto &item : parameters) {
|
||||
if (!item->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
std::string parameter_name = item->fullname_with_scope();
|
||||
if (!dump_json_parser.NeedDump(parameter_name)) {
|
||||
continue;
|
||||
}
|
||||
DumpJsonParser::GetInstance().MatchKernel(parameter_name);
|
||||
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
|
||||
ShapeVector int_shapes;
|
||||
GetDumpIntShape(item, PRAMATER_OUTPUT_INDEX, trans_flag, NOT_NULL(&int_shapes));
|
||||
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
|
||||
|
||||
std::string file_path = dump_path + '/' + parameter_name + '_' + "output_0";
|
||||
if (IsDeviceTargetGPU()) {
|
||||
DumpGPUMemToFile(file_path, parameter_name, NOT_NULL(addr), trans_flag, int_shapes, type, 0, debugger);
|
||||
} else {
|
||||
DumpMemToFile(file_path, NOT_NULL(addr), trans_flag, int_shapes, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool E2eDumpUtil::DumpData(const session::KernelGraph *graph, Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
auto &dump_json_parser = DumpJsonParser::GetInstance();
|
||||
dump_json_parser.UpdateDumpIter();
|
||||
auto dump_flag = dump_json_parser.e2e_dump_enabled();
|
||||
if (!dump_flag) {
|
||||
MS_LOG(INFO) << "E2e dump is disabled, skip dump step";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (dump_json_parser.iteration() != 0) {
|
||||
if (dump_json_parser.cur_dump_iter() != dump_json_parser.iteration()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
|
||||
std::string net_name = dump_json_parser.net_name();
|
||||
std::string iterator = std::to_string(dump_json_parser.cur_dump_iter());
|
||||
std::string dump_path = dump_json_parser.path();
|
||||
if (dump_path.back() == '/') {
|
||||
dump_path = dump_path + net_name + '/' + iterator;
|
||||
} else {
|
||||
dump_path = dump_path + '/' + net_name + '/' + iterator;
|
||||
}
|
||||
DumpInput(graph, dump_path, debugger);
|
||||
DumpOutput(graph, dump_path, debugger);
|
||||
DumpParameters(graph, dump_path, debugger);
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
@ -0,0 +1,48 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
|
||||
#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
|
||||
|
||||
#include <string>
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "runtime/device/device_address.h"
|
||||
#ifndef ENABLE_DEBUGGER
|
||||
class Debugger;
|
||||
#endif
|
||||
namespace mindspore {
|
||||
class E2eDumpUtil {
|
||||
public:
|
||||
E2eDumpUtil() = default;
|
||||
~E2eDumpUtil() = default;
|
||||
static bool DumpData(const session::KernelGraph *graph, Debugger *debugger = nullptr);
|
||||
|
||||
private:
|
||||
static void DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
|
||||
static void DumpInput(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
|
||||
static void DumpParameters(const session::KernelGraph *graph, const std::string &dump_path, Debugger *debugger);
|
||||
|
||||
static void GetFileKernelName(NotNull<std::string *> kernel_name);
|
||||
static void DumpMemToFile(const std::string &file_path, NotNull<const device::DeviceAddress *> addr, bool trans_flag,
|
||||
const ShapeVector &int_shapes, const TypeId &type);
|
||||
static void DumpGPUMemToFile(const std::string &file_path, const std::string &original_kernel_name,
|
||||
NotNull<const device::DeviceAddress *> addr, bool trans_flag,
|
||||
const ShapeVector &int_shapes, const TypeId &type, size_t slot, Debugger *debugger);
|
||||
static void GetDumpIntShape(const AnfNodePtr &node, size_t index, bool trans_flag, NotNull<ShapeVector *> int_shapes);
|
||||
static bool IsDeviceTargetGPU();
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_E_2_E_DUMP_UTIL_H_
|
@ -1,236 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "debug/data_dump_parser.h"
|
||||
|
||||
#include <fstream>
|
||||
#include "utils/ms_context.h"
|
||||
#include "debug/common.h"
|
||||
|
||||
static constexpr auto kDataDumpConfigPtah = "DATA_DUMP_CONFIG_PATH";
|
||||
static constexpr auto kEnableDataDump = "ENABLE_DATA_DUMP";
|
||||
static constexpr auto kDataDumpPath = "DATA_DUMP_PATH";
|
||||
static constexpr auto kConfigDumpMode = "dump_mode";
|
||||
static constexpr auto kConfigOpDebugMode = "op_debug_mode";
|
||||
static constexpr auto kConfigNetName = "net_name";
|
||||
static constexpr auto kConfigIteration = "iteration";
|
||||
static constexpr auto kConfigKernels = "kernels";
|
||||
|
||||
namespace mindspore {
|
||||
void DataDumpParser::ResetParam() {
|
||||
enable_ = false;
|
||||
net_name_.clear();
|
||||
dump_mode_ = 0;
|
||||
dump_step_ = 0;
|
||||
kernel_map_.clear();
|
||||
}
|
||||
|
||||
bool DataDumpParser::DumpEnabled() const {
|
||||
auto enable_dump = std::getenv(kEnableDataDump);
|
||||
if (enable_dump == nullptr) {
|
||||
MS_LOG(INFO) << "[DataDump] enable dump is null. If you want to dump data, please export ENABLE_DATA_DUMP";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto enabled = std::atoi(enable_dump);
|
||||
if (enabled != 1) {
|
||||
MS_LOG(WARNING) << "[DataDump] If you want to dump data, please export ENABLE_DATA_DUMP=1";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
if (context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] PyNative mode not support data dump";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<std::string> DataDumpParser::GetDumpPath() const {
|
||||
auto dump_path = std::getenv(kDataDumpPath);
|
||||
if (dump_path == nullptr) {
|
||||
MS_LOG(ERROR) << "[DataDump] dump path is null. If you want to dump data, please export DATA_DUMP_PATH";
|
||||
return {};
|
||||
}
|
||||
std::string dump_path_str(dump_path);
|
||||
if (!std::all_of(dump_path_str.begin(), dump_path_str.end(),
|
||||
[](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] dump path only support alphabets, digit or {'-', '_', '/'}, but got:"
|
||||
<< dump_path_str;
|
||||
}
|
||||
return dump_path_str;
|
||||
}
|
||||
|
||||
std::string GetIfstreamString(const std::ifstream &ifstream) {
|
||||
std::stringstream buffer;
|
||||
buffer << ifstream.rdbuf();
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
void DataDumpParser::ParseDumpConfig() {
|
||||
std::lock_guard<std::mutex> guard(lock_);
|
||||
MS_LOG(INFO) << "[DataDump] parse start";
|
||||
if (!DumpEnabled()) {
|
||||
MS_LOG(INFO) << "[DataDump] dump not enable";
|
||||
return;
|
||||
}
|
||||
|
||||
ResetParam();
|
||||
|
||||
auto dump_config_file = Common::GetConfigFile(kDataDumpConfigPtah);
|
||||
if (!dump_config_file.has_value()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Get config file failed";
|
||||
}
|
||||
|
||||
std::ifstream json_file(dump_config_file.value());
|
||||
if (!json_file.is_open()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] " << dump_config_file.value() << " open failed.";
|
||||
}
|
||||
|
||||
nlohmann::json j;
|
||||
try {
|
||||
json_file >> j;
|
||||
} catch (nlohmann::json::parse_error &e) {
|
||||
MS_LOG(ERROR) << "[DataDump] json contents:" << GetIfstreamString(json_file);
|
||||
MS_LOG(EXCEPTION) << "[DataDump] parse json failed, error:" << e.what();
|
||||
}
|
||||
if (j.find("DumpSettings") == j.end()) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] DumpSettings is not exist.";
|
||||
}
|
||||
|
||||
nlohmann::json dump_settings = j.at("DumpSettings");
|
||||
// convert json to string
|
||||
std::stringstream ss;
|
||||
ss << dump_settings;
|
||||
std::string cfg = ss.str();
|
||||
MS_LOG(INFO) << "[DataDump] Async dump settings Json: " << cfg;
|
||||
if (!IsConfigExist(dump_settings)) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Async dump json invalid";
|
||||
}
|
||||
|
||||
if (!ParseDumpSetting(dump_settings)) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] Parse dump json failed";
|
||||
}
|
||||
}
|
||||
|
||||
bool DataDumpParser::NeedDump(const std::string &op_full_name) const {
|
||||
if (!DumpEnabled()) {
|
||||
return false;
|
||||
}
|
||||
if (dump_mode_ == 0) {
|
||||
return true;
|
||||
}
|
||||
auto iter = kernel_map_.find(op_full_name);
|
||||
return iter != kernel_map_.end();
|
||||
}
|
||||
|
||||
bool CheckConfigKey(const nlohmann::json &dump_settings, const std::string &key) {
|
||||
if (dump_settings.find(key) == dump_settings.end()) {
|
||||
MS_LOG(ERROR) << "[DataDump] DumpSettings key:" << key << " is not exist.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DataDumpParser::IsConfigExist(const nlohmann::json &dump_settings) const {
|
||||
return CheckConfigKey(dump_settings, kConfigDumpMode) && CheckConfigKey(dump_settings, kConfigNetName) &&
|
||||
CheckConfigKey(dump_settings, kConfigOpDebugMode) && CheckConfigKey(dump_settings, kConfigIteration) &&
|
||||
CheckConfigKey(dump_settings, kConfigKernels);
|
||||
}
|
||||
|
||||
bool DataDumpParser::ParseDumpSetting(const nlohmann::json &dump_settings) {
|
||||
auto mode = dump_settings.at(kConfigDumpMode);
|
||||
auto op_debug_mode = dump_settings.at(kConfigOpDebugMode);
|
||||
auto net_name = dump_settings.at(kConfigNetName);
|
||||
auto iteration = dump_settings.at(kConfigIteration);
|
||||
auto kernels = dump_settings.at(kConfigKernels);
|
||||
if (!(mode.is_number_unsigned() && op_debug_mode.is_number_unsigned() && net_name.is_string() &&
|
||||
iteration.is_number_unsigned() && kernels.is_array())) {
|
||||
MS_LOG(ERROR) << "[DataDump] Element's type in Dump config json is invalid.";
|
||||
enable_ = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
CheckDumpMode(mode);
|
||||
CheckOpDebugMode(op_debug_mode);
|
||||
|
||||
enable_ = true;
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
dump_mode_ = mode;
|
||||
op_debug_mode_ = op_debug_mode;
|
||||
net_name_ = net_name;
|
||||
dump_step_ = iteration;
|
||||
for (const auto &kernel : kernels) {
|
||||
auto kernel_str = kernel.dump();
|
||||
kernel_str.erase(std::remove(kernel_str.begin(), kernel_str.end(), '\"'), kernel_str.end());
|
||||
MS_LOG(INFO) << "[DataDump] Need dump kernel:" << kernel_str;
|
||||
kernel_map_.insert({kernel_str, 0});
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void DataDumpParser::MatchKernel(const std::string &kernel_name) {
|
||||
auto iter = kernel_map_.find(kernel_name);
|
||||
if (iter == kernel_map_.end()) {
|
||||
return;
|
||||
}
|
||||
iter->second = iter->second + 1;
|
||||
MS_LOG(INFO) << "Match dump kernel:" << iter->first << " match times:" << iter->second;
|
||||
}
|
||||
|
||||
void DataDumpParser::PrintUnusedKernel() {
|
||||
for (const auto &iter : kernel_map_) {
|
||||
if (iter.second == 0) {
|
||||
MS_LOG(WARNING) << "[DataDump] Unused Kernel in json:" << iter.first;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DataDumpParser::CheckDumpMode(uint32_t dump_mode) const {
|
||||
if (dump_mode != 0 && dump_mode != 1) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] dump_mode in config json should be 0 or 1";
|
||||
}
|
||||
}
|
||||
|
||||
void DataDumpParser::CheckOpDebugMode(uint32_t op_debug_mode) const {
|
||||
if (op_debug_mode < 0 || op_debug_mode > 3) {
|
||||
MS_LOG(EXCEPTION) << "[DataDump] op_debug_mode in config json file should be [0-3]";
|
||||
}
|
||||
}
|
||||
|
||||
std::string DataDumpParser::GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const {
|
||||
std::string bin_path = "/var/log/npu/ide_daemon/dump";
|
||||
|
||||
const char *dump_data_path = std::getenv("DATA_DUMP_PATH");
|
||||
if (dump_data_path != nullptr) {
|
||||
bin_path.append(dump_data_path);
|
||||
bin_path.append("_");
|
||||
}
|
||||
bin_path.append(std::to_string(device_id));
|
||||
bin_path.append("/");
|
||||
bin_path.append(net_name_);
|
||||
bin_path.append("_");
|
||||
bin_path.append(std::to_string(graph_id));
|
||||
bin_path.append("/");
|
||||
bin_path.append(std::to_string(dump_mode_));
|
||||
bin_path.append("/");
|
||||
bin_path.append(std::to_string(dump_step_));
|
||||
bin_path.append("/");
|
||||
|
||||
return bin_path;
|
||||
}
|
||||
} // namespace mindspore
|
@ -1,67 +0,0 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
||||
#define MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
class DataDumpParser {
|
||||
public:
|
||||
static DataDumpParser &GetInstance() {
|
||||
static DataDumpParser instance;
|
||||
return instance;
|
||||
}
|
||||
void ParseDumpConfig();
|
||||
bool NeedDump(const std::string &op_full_name) const;
|
||||
bool DumpEnabled() const;
|
||||
std::optional<std::string> GetDumpPath() const;
|
||||
bool enable() const { return enable_; }
|
||||
const std::string &net_name() const { return net_name_; }
|
||||
uint32_t dump_mode() const { return dump_mode_; }
|
||||
uint32_t op_debug_mode() const { return op_debug_mode_; }
|
||||
uint32_t dump_step() const { return dump_step_; }
|
||||
void MatchKernel(const std::string &kernel_name);
|
||||
void PrintUnusedKernel();
|
||||
std::string GetOpOverflowBinPath(uint32_t graph_id, uint32_t device_id) const;
|
||||
|
||||
private:
|
||||
DataDumpParser() = default;
|
||||
virtual ~DataDumpParser() = default;
|
||||
DISABLE_COPY_AND_ASSIGN(DataDumpParser);
|
||||
|
||||
void ResetParam();
|
||||
bool IsConfigExist(const nlohmann::json &dump_settings) const;
|
||||
bool ParseDumpSetting(const nlohmann::json &dump_settings);
|
||||
void CheckDumpMode(uint32_t dump_mode) const;
|
||||
void CheckOpDebugMode(uint32_t op_debug_mode) const;
|
||||
|
||||
std::mutex lock_;
|
||||
bool enable_{false};
|
||||
std::string net_name_;
|
||||
uint32_t op_debug_mode_{0};
|
||||
uint32_t dump_mode_{0};
|
||||
uint32_t dump_step_{0};
|
||||
std::map<std::string, uint32_t> kernel_map_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_DEBUG_ASYNC_DUMP_JSON_PARE_H_
|
@ -1,178 +0,0 @@
|
||||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "debug/e2e_dump.h"
|
||||
#include <limits.h>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <optional>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "utils/log_adapter.h"
|
||||
#include "utils/system/file_system.h"
|
||||
#include "utils/system/env.h"
|
||||
#include "utils/convert_utils.h"
|
||||
#include "utils/ms_context.h"
|
||||
#include "debug/common.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace mindspore {
|
||||
Dump::Dump()
|
||||
: dump_enable_(false),
|
||||
trans_flag_(false),
|
||||
dump_path_("/tmp/"),
|
||||
dump_net_name_("net_name"),
|
||||
dump_mode_(0),
|
||||
dump_iter_(0),
|
||||
cur_iter_(0) {}
|
||||
|
||||
bool Dump::IsKernelNeedDump(const std::string &kernel_name) {
|
||||
if (dump_mode_ == 0) {
|
||||
// Dump All Kernels mode
|
||||
return true;
|
||||
} else {
|
||||
auto iter = std::find(dump_kernels_.begin(), dump_kernels_.end(), kernel_name);
|
||||
if (iter != dump_kernels_.end()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Dump::ParseDumpConfig(const std::string &dump_config_file) {
|
||||
std::ifstream jsonFile(dump_config_file);
|
||||
if (!jsonFile.is_open()) {
|
||||
MS_LOG(ERROR) << dump_config_file << " open failed.";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
json j;
|
||||
jsonFile >> j;
|
||||
if (j.find("DumpSettings") == j.end()) {
|
||||
MS_LOG(ERROR) << "DumpSettings is not exist.";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
} else {
|
||||
json dumpSettings = j.at("DumpSettings");
|
||||
// convert json to string
|
||||
std::stringstream ss;
|
||||
ss << dumpSettings;
|
||||
std::string cfg = ss.str();
|
||||
MS_LOG(INFO) << "E2E Dump Settings Json: " << cfg;
|
||||
if (!IsConfigExist(dumpSettings)) {
|
||||
return false;
|
||||
}
|
||||
if (!IsConfigValid(dumpSettings)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dump::IsConfigExist(const nlohmann::json &dumpSettings) {
|
||||
if (dumpSettings.find("trans_flag") == dumpSettings.end() || dumpSettings.find("enable") == dumpSettings.end() ||
|
||||
dumpSettings.find("mode") == dumpSettings.end() || dumpSettings.find("path") == dumpSettings.end() ||
|
||||
dumpSettings.find("net_name") == dumpSettings.end() || dumpSettings.find("iteration") == dumpSettings.end() ||
|
||||
dumpSettings.find("kernels") == dumpSettings.end()) {
|
||||
MS_LOG(ERROR) << "DumpSettings keys is not exist.";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dump::IsConfigValid(const nlohmann::json &dumpSettings) {
|
||||
auto trans_flag = dumpSettings.at("trans_flag");
|
||||
auto enable = dumpSettings.at("enable");
|
||||
auto mode = dumpSettings.at("mode");
|
||||
auto path = dumpSettings.at("path");
|
||||
auto net_name = dumpSettings.at("net_name");
|
||||
auto iteration = dumpSettings.at("iteration");
|
||||
auto kernels = dumpSettings.at("kernels");
|
||||
if (!(enable.is_boolean() && trans_flag.is_boolean() && mode.is_number() && path.is_string() &&
|
||||
net_name.is_string() && iteration.is_number() && kernels.is_array())) {
|
||||
MS_LOG(ERROR) << "Element's type in Dump config json is invalid.";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
dump_enable_ = enable;
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
// dump_enable_ is true, close mem reuse
|
||||
context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, !dump_enable_);
|
||||
trans_flag_ = trans_flag;
|
||||
dump_mode_ = mode;
|
||||
dump_path_ = path;
|
||||
dump_net_name_ = net_name;
|
||||
dump_iter_ = iteration;
|
||||
for (const auto &kernel : kernels) {
|
||||
dump_kernels_.push_back(kernel);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dump::SetDumpConfFromJsonFile() {
|
||||
const char *config_path_str = std::getenv("MINDSPORE_CONFIG_PATH");
|
||||
if (config_path_str != nullptr) {
|
||||
MS_LOG(INFO) << "Getenv MINDSPORE_CONFIG_PATH :" << config_path_str;
|
||||
} else {
|
||||
MS_LOG(INFO) << "No need E2E Dump. please export MINDSPORE_CONFIG_PATH eg: MINDSPORE_CONFIG_PATH=/etc";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
auto id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||
char real_path[PATH_MAX] = {0};
|
||||
if (nullptr == realpath(config_path_str, real_path)) {
|
||||
MS_LOG(ERROR) << "Env e2e dump path error, " << config_path_str;
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
std::string dump_config_file = std::string(real_path) + "/e2e_dump_config_" + std::to_string(id) + ".json";
|
||||
std::shared_ptr<system::FileSystem> fs = system::Env::GetFileSystem();
|
||||
MS_EXCEPTION_IF_NULL(fs);
|
||||
if (!fs->FileExist(dump_config_file)) {
|
||||
MS_LOG(ERROR) << dump_config_file << " not exist.";
|
||||
dump_enable_ = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return ParseDumpConfig(dump_config_file);
|
||||
}
|
||||
|
||||
bool Dump::DumpToFile(const std::string &filename, const void *data, size_t len) {
|
||||
if (filename.empty() || data == nullptr || len == 0) {
|
||||
MS_LOG(ERROR) << "Incorrect parameter.";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto realpath = Common::GetRealPath(filename);
|
||||
if (!realpath.has_value()) {
|
||||
MS_LOG(ERROR) << "Get real path failed.";
|
||||
return false;
|
||||
}
|
||||
std::ofstream fd;
|
||||
fd.open(realpath.value(), std::ios::binary | std::ios::out);
|
||||
if (!fd.is_open()) {
|
||||
MS_LOG(ERROR) << "Open file " << realpath.value() << " fail.";
|
||||
return false;
|
||||
}
|
||||
(void)fd.write(reinterpret_cast<const char *>(data), SizeToLong(len));
|
||||
fd.close();
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
@ -1,70 +0,0 @@
|
||||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_E2E_DUMP_H
|
||||
#define MINDSPORE_E2E_DUMP_H
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace mindspore {
|
||||
class Dump {
|
||||
public:
|
||||
Dump();
|
||||
|
||||
~Dump() = default;
|
||||
|
||||
bool dump_enable() const { return dump_enable_; }
|
||||
|
||||
bool trans_flag() const { return trans_flag_; }
|
||||
|
||||
std::string dump_path() const { return dump_path_; }
|
||||
|
||||
std::string dump_net_name() const { return dump_net_name_; }
|
||||
|
||||
uint32_t dump_iter() const { return dump_iter_; }
|
||||
|
||||
void UpdataCurIter() { cur_iter_++; }
|
||||
|
||||
uint32_t cur_iter() const { return cur_iter_; }
|
||||
|
||||
bool IsKernelNeedDump(const std::string &kernel_name);
|
||||
|
||||
bool SetDumpConfFromJsonFile();
|
||||
|
||||
static bool DumpToFile(const std::string &filename, const void *data, size_t len);
|
||||
|
||||
protected:
|
||||
bool dump_enable_;
|
||||
bool trans_flag_;
|
||||
std::string dump_path_;
|
||||
std::string dump_net_name_;
|
||||
uint32_t dump_mode_;
|
||||
uint32_t dump_iter_;
|
||||
uint32_t cur_iter_;
|
||||
std::vector<std::string> dump_kernels_;
|
||||
|
||||
private:
|
||||
bool ParseDumpConfig(const std::string &dump_config_file);
|
||||
bool IsConfigExist(const nlohmann::json &dumpSettings);
|
||||
bool IsConfigValid(const nlohmann::json &dumpSettings);
|
||||
};
|
||||
|
||||
using DumpConfPtr = std::shared_ptr<Dump>;
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_E2E_DUMP_H
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue