Profiling AR version 3

pull/576/head
taoxiangdong 4 years ago
parent 62b3e8eeb4
commit 612c82f405

@ -22,6 +22,7 @@
#include "graph/load/graph_loader.h" #include "graph/load/graph_loader.h"
#include "init/gelib.h" #include "init/gelib.h"
#include "framework/common/ge_inner_error_codes.h" #include "framework/common/ge_inner_error_codes.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
namespace { namespace {
const uint32_t kDeviceListIndex = 3; const uint32_t kDeviceListIndex = 3;
@ -44,7 +45,7 @@ const std::map<ProfCommandHandleType, std::string> kProfCommandTypeMap = {
{kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}}; {kProfCommandhandleModelUnsubscribe, kProfModelUnsubscribe}};
} // namespace } // namespace
bool TransProfConfigToParam(const MsprofCommandHandle &profCommand, vector<string> &prof_config_params) { bool TransProfConfigToParam(const ProfCommandHandleData &profCommand, vector<string> &prof_config_params) {
prof_config_params.clear(); prof_config_params.clear();
prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(kDeviceNums);
prof_config_params.emplace_back(std::to_string(profCommand.devNums)); prof_config_params.emplace_back(std::to_string(profCommand.devNums));
@ -71,7 +72,7 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
return false; return false;
} }
if (device_nums == 0 || device_nums > MAX_DEV_NUM) { if (device_nums == 0 || device_nums > MAX_DEV_NUM) {
GELOGE(ge::PARAM_INVALID, "The device nums is invalid."); GELOGE(ge::PARAM_INVALID, "The device nums: %u is invalid.", device_nums);
return false; return false;
} }
@ -105,16 +106,26 @@ bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) {
} }
ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) { ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "Msprof ctrl callback is nullptr.");
return ge::PARAM_INVALID;
}
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof ctrl callback is exist, just ignore it."); GELOGW("Msprof ctrl callback is exist, just ignore it.");
} else { } else {
GELOGI("GE register Msprof ctrl callback.");
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func); ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
} }
return ge::SUCCESS; return ge::SUCCESS;
} }
ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) { ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofSetDeviceCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
// Pass MsprofSetDeviceCallback to runtime // Pass MsprofSetDeviceCallback to runtime
GELOGI("GE pass setdevice callback to runtime.");
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func)); ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
if (rt_ret != ge::SUCCESS) { if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!"); GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
@ -124,12 +135,15 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
} }
ge::Status RegProfReporterCallback(MsprofReporterCallback func) { ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
if (func == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) { if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof reporter callback is exist, just ignore it."); GELOGW("Msprof reporter callback is exist, just ignore it.");
} else { } else {
GELOGI("GE register Msprof reporter callback."); GELOGI("GE register Msprof reporter callback.");
ge::ProfilingManager::Instance().SetMsprofReporterCallback(func); ge::ProfilingManager::Instance().SetMsprofReporterCallback(func);
}
// Pass MsprofReporterCallback to runtime // Pass MsprofReporterCallback to runtime
ge::Status rt_ret = rtSetMsprofReporterCallback(func); ge::Status rt_ret = rtSetMsprofReporterCallback(func);
if (rt_ret != ge::SUCCESS) { if (rt_ret != ge::SUCCESS) {
@ -137,41 +151,55 @@ ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
return rt_ret; return rt_ret;
} }
// Pass MsprofReporterCallback to hccl in opskernel so initialize // Pass MsprofReporterCallback to hccl in opskernel so initialize
rt_ret = OpskernelBuilderManager::Instance().RegProfReporterCallback(func);
if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "Pass MsprofReporterCallback to hccl failed.");
return rt_ret;
}
}
return ge::SUCCESS; return ge::SUCCESS;
} }
ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) { ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t len) {
if (type != kProfCommandhandleFinalize) {
GE_CHECK_NOTNULL(data); GE_CHECK_NOTNULL(data);
MsprofCommandHandle *prof_config_param = (MsprofCommandHandle *)data; }
MsprofCommandHandleData *prof_config_param = (MsprofCommandHandleData *)data;
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
return ge::PARAM_INVALID;
}
std::vector<string> prof_params;
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) { if (!isProfConfigValid(prof_config_param->devIdList, prof_config_param->devNums)) {
return ge::FAILED; return ge::FAILED;
} }
std::vector<string> prof_params;
if (!TransProfConfigToParam(*prof_config_param, prof_params)) { if (!TransProfConfigToParam(*prof_config_param, prof_params)) {
GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed"); GELOGE(ge::PARAM_INVALID, "Transfer profilerConfig to string vector failed");
return ge::PARAM_INVALID; return ge::PARAM_INVALID;
} }
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
return ge::PARAM_INVALID;
} }
ge::GraphLoader graph_loader; ge::GraphLoader graph_loader;
ge::Command command; ge::Command command;
command.cmd_params.clear(); command.cmd_params.clear();
command.cmd_type = iter->second; command.cmd_type = iter->second;
command.cmd_params = prof_params; command.cmd_params = prof_params;
if (type != kProfCommandhandleFinalize) {
command.module_index = prof_config_param->profSwitch; command.module_index = prof_config_param->profSwitch;
GELOGI("GE commandhandle execute, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), }
prof_params[kDeviceListIndex].c_str(), command.module_index); GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}
ge::Status ret = graph_loader.CommandHandle(command); ge::Status ret = graph_loader.CommandHandle(command);
if (ret != ge::SUCCESS) { if (ret != ge::SUCCESS) {
GELOGE(ret, "Handle profiling command failed"); GELOGE(ret, "Handle profiling command failed");
return ge::FAILED; return ge::FAILED;
} }
GELOGI("Successfully execute profiling command 0x%llx.", command.module_index); GELOGI("Successfully execute profiling command type: %d, command 0x%llx.", type, command.module_index);
return ge::SUCCESS; return ge::SUCCESS;
} }

@ -22,6 +22,7 @@
#include "graph/ge_context.h" #include "graph/ge_context.h"
#include "runtime/base.h" #include "runtime/base.h"
#include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model.h"
#include "opskernel_manager/ops_kernel_builder_manager.h"
namespace { namespace {
const char *const kTrainingTrace = "training_trace"; const char *const kTrainingTrace = "training_trace";
@ -41,7 +42,10 @@ namespace ge {
ProfilingManager::ProfilingManager() : is_load_profiling_(false), ProfilingManager::ProfilingManager() : is_load_profiling_(false),
is_execute_profiling_(false), is_execute_profiling_(false),
is_training_trace_(false), is_training_trace_(false),
subscribe_count_(0) {} subscribe_count_(0) {
prof_cb_.msprofCtrlCallback = nullptr;
prof_cb_.msprofReporterCallback = nullptr;
}
ProfilingManager::~ProfilingManager() {} ProfilingManager::~ProfilingManager() {}
@ -64,6 +68,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
} }
if (is_execute_profiling_) { if (is_execute_profiling_) {
if (prof_cb_.msprofCtrlCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
int32_t cb_ret = prof_cb_.msprofCtrlCallback( int32_t cb_ret = prof_cb_.msprofCtrlCallback(
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS), static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions)); static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
@ -116,7 +124,7 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
// Parse json str for bp fp // Parse json str for bp fp
Status ret = ParseOptions(prof_conf.options); Status ret = ParseOptions(prof_conf.options);
if (ret != ge::SUCCESS) { if (ret != ge::SUCCESS) {
GELOGE(ge::PARAM_INVALID, "Parse taining trace param failed."); GELOGE(ge::PARAM_INVALID, "Parse training trace param failed.");
return ge::PARAM_INVALID; return ge::PARAM_INVALID;
} }
@ -182,6 +190,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
} }
// stop profiling // stop profiling
if (prof_cb_.msprofCtrlCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofCtrlCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE), int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE),
nullptr, 0); nullptr, 0);
if (cb_ret != 0) { if (cb_ret != 0) {
@ -210,7 +222,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
.append(std::to_string(stream_id)).append(" ") .append(std::to_string(stream_id)).append(" ")
.append(std::to_string(model_id)).append("\n")); .append(std::to_string(model_id)).append("\n"));
ReporterData reporter_data; ReporterData reporter_data{};
reporter_data.deviceId = device_id; reporter_data.deviceId = device_id;
reporter_data.data = (unsigned char *)data.c_str(); reporter_data.data = (unsigned char *)data.c_str();
reporter_data.dataLen = data.size(); reporter_data.dataLen = data.size();
@ -298,7 +310,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::Profilin
void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) { void ProfilingManager::GraphDescReport(const int32_t &device_id, const string &data) {
#ifdef DAVINCI_SUPPORT_PROFILING #ifdef DAVINCI_SUPPORT_PROFILING
ReporterData reporter_data; ReporterData reporter_data{};
int ret = -1; int ret = -1;
int32_t cb_ret = -1; int32_t cb_ret = -1;
size_t index = data.size() / kReportMaxLen; size_t index = data.size() / kReportMaxLen;
@ -487,6 +499,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfIn
GELOGE(FAILED, "Runtime profiler start failed."); GELOGE(FAILED, "Runtime profiler start failed.");
return FAILED; return FAILED;
} }
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(model_load_mask);
if (hccl_ret != SUCCESS) {
GELOGE(FAILED, "Hccl profiler start failed.");
return FAILED;
}
is_load_profiling_ = true; is_load_profiling_ = true;
GELOGI("Prof init: model load profiling on."); GELOGI("Prof init: model load profiling on.");
} }
@ -517,6 +534,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
return FAILED; return FAILED;
} }
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(PROF_MODEL_LOAD_MASK);
if (hccl_ret != SUCCESS) {
GELOGE(FAILED, "Hccl profiler stop failed.");
return FAILED;
}
for (auto device_id_module : device_id_module_map_) { for (auto device_id_module : device_id_module_map_) {
if (device_id_module.second != 0) { if (device_id_module.second != 0) {
uint32_t device_id = static_cast<uint32_t>(device_id_module.first); uint32_t device_id = static_cast<uint32_t>(device_id_module.first);
@ -640,6 +662,12 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
GELOGE(FAILED, "Runtime profiler config proc failed."); GELOGE(FAILED, "Runtime profiler config proc failed.");
return FAILED; return FAILED;
} }
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStart(module);
if (hccl_ret != SUCCESS) {
GELOGE(FAILED, "Hccl profiler start failed.");
return FAILED;
}
if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) { if ((module & PROF_MODEL_EXECUTE_MASK) == PROF_MODEL_EXECUTE_MASK) {
for (int32_t i = 0; i < device_num; i++) { for (int32_t i = 0; i < device_num; i++) {
if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) { if (std::find(device_id_.begin(), device_id_.end(), device_list[i]) == device_id_.end()) {
@ -681,6 +709,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfSt
GELOGE(FAILED, "Prof stop: runtime profiler config proc failed."); GELOGE(FAILED, "Prof stop: runtime profiler config proc failed.");
return FAILED; return FAILED;
} }
Status hccl_ret = OpskernelBuilderManager::Instance().ProfStop(module);
if (hccl_ret != SUCCESS) {
GELOGE(FAILED, "Hccl profiler stop failed.");
return FAILED;
}
uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK; uint64_t execute_model_mask = module & PROF_MODEL_EXECUTE_MASK;
if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) { if (execute_model_mask == PROF_MODEL_EXECUTE_MASK) {
for (int32_t i = 0; i < device_num; i++) { for (int32_t i = 0; i < device_num; i++) {
@ -749,6 +782,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
} }
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const {
if (prof_cb_.msprofReporterCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
return prof_cb_.msprofReporterCallback( return prof_cb_.msprofReporterCallback(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT), static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_INIT),
@ -757,6 +794,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::Plugin
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const { FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUnInit() const {
#ifdef DAVINCI_SUPPORT_PROFILING #ifdef DAVINCI_SUPPORT_PROFILING
if (prof_cb_.msprofReporterCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
int32_t cb_ret = prof_cb_.msprofReporterCallback( int32_t cb_ret = prof_cb_.msprofReporterCallback(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT), static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_UNINIT),
@ -769,6 +810,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::PluginUn
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport( FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMsprofReport(
ReporterData &reporter_data) const { ReporterData &reporter_data) const {
if (prof_cb_.msprofReporterCallback == nullptr) {
GELOGE(ge::PARAM_INVALID, "MsprofReporterCallback callback is nullptr.");
return ge::PARAM_INVALID;
}
return prof_cb_.msprofReporterCallback( return prof_cb_.msprofReporterCallback(
static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK), static_cast<uint32_t>(MsprofReporterModuleId::MSPROF_MODULE_FRAMEWORK),
static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT), static_cast<uint32_t>(MsprofReporterCallbackType::MSPROF_REPORTER_REPORT),
@ -806,8 +851,9 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
} }
} catch (...) { } catch (...) {
GELOGE(FAILED, "Json prof options is invalid."); GELOGE(FAILED, "Json prof options is invalid.");
return ge::PARAM_INVALID; return;
} }
return;
} }

@ -35,6 +35,23 @@ using Json = nlohmann::json;
namespace { namespace {
const std::string GE_PROFILING_MODULE = "Framework"; const std::string GE_PROFILING_MODULE = "Framework";
// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x0001
#define PROF_TASK_TIME_MASK 0x0002
#define PROF_AICORE_METRICS_MASK 0x0004
#define PROF_AICPU_TRACE_MASK 0x0008
#define PROF_MODEL_EXECUTE_MASK 0x0010
#define PROF_RUNTIME_API_MASK 0x0020
#define PROF_RUNTIME_TRACE_MASK 0x0040
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080
#define PROF_SCHEDULE_TRACE_MASK 0x0100
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200
#define PROF_SUBTASK_TIME_MASK 0x0400
#define PROF_TRAINING_TRACE_MASK 0x0800
#define PROF_HCCL_TRACE_MASK 0x1000
#define PROF_DATA_PROCESS_MASK 0x2000
#define PROF_MODEL_LOAD_MASK 0x8000000000000000
} // namespace } // namespace
namespace ge { namespace ge {
struct DeviceSubsInfo { struct DeviceSubsInfo {

Loading…
Cancel
Save