profiling AR version2

pull/576/head
taoxiangdong 4 years ago
parent 07f5327b18
commit 62b3e8eeb4

@ -648,9 +648,11 @@ target_include_directories(ge_runner PRIVATE
target_link_libraries(ge_runner
$<BUILD_INTERFACE:intf_pub>
-Wl,--whole-archive
msprofiler_fwk
-Wl,--no-whole-archive
ge_memory
adump_server
msprofiler_fwk
static_mmpa
-Wl,--no-as-needed
graph

@ -125,8 +125,9 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
ge::Status RegProfReporterCallback(MsprofReporterCallback func) {
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof ctrl callback is exist, just ignore it.");
GELOGW("Msprof reporter callback is exist, just ignore it.");
} else {
GELOGI("GE register Msprof reporter callback.");
ge::ProfilingManager::Instance().SetMsprofReporterCallback(func);
}
// Pass MsprofReporterCallback to runtime

@ -24,16 +24,9 @@
#include "graph/load/new_model_manager/davinci_model.h"
namespace {
const char *const kJobID = "jobID";
const char *const kDeviceID = "deviceID";
const char *const kStartCfg = "startCfg";
const char *const kFeatures = "features";
const char *const kConf = "conf";
const char *const kEvents = "events";
const char *const kAiCoreEvents = "ai_core_events";
const char *const kName = "name";
const char *const kTraceID = "traceId";
const char *const kProfDir = "resultPath";
const char *const kTrainingTrace = "training_trace";
const char *const kFpPoint = "fp_point";
const char *const kBpPoint = "bp_point";
const size_t kReportMaxLen = 2048;
const int32_t kMaxDeviceNum = 256;
const std::string kConfigNumsdev = "devNums";
@ -70,7 +63,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
return ret;
}
if (is_load_profiling_) {
if (is_execute_profiling_) {
int32_t cb_ret = prof_cb_.msprofCtrlCallback(
static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_INIT_GE_OPTIONS),
static_cast<void *>(&prof_conf), sizeof(MsprofGeOptions));
@ -91,37 +84,42 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
#ifdef DAVINCI_SUPPORT_PROFILING
// enable profiling by env
char env_profiling_mode[MMPA_MAX_PATH] = { 0x00 };
is_load_profiling_ = false;
is_load_profiling_ = false; // Change in ProfInit
is_execute_profiling_ = false;
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, sizeof(MsprofGeOptions));
if ((env_profiling_mode != nullptr) && (strcmp("true", env_profiling_mode) == 0)
&& (strcmp(prof_conf.options, "\0") != 0)) {
// enable profiling by env
is_load_profiling_ = true;
is_execute_profiling_ = true;
GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options);
} else {
if (options.profiling_mode != "1" || options.profiling_options.empty()) {
return SUCCESS;
}
if (options.profiling_mode == "1" && !options.profiling_options.empty()) {
// enable profiling by ge option
if (memcpy_s(prof_conf.options, sizeof(prof_conf.options), options.profiling_options.c_str(),
if (memcpy_s(prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX, options.profiling_options.c_str(),
sizeof(options.profiling_options.c_str())) != EOK) {
GELOGE(INTERNAL_ERROR, "copy profiling_options failed.");
return INTERNAL_ERROR;
}
is_load_profiling_ = true;
is_execute_profiling_ = true;
GELOGI("The profiling in options is %s, %s", options.profiling_mode.c_str(), prof_conf.options);
} else {
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
// The env is invalid
if ((env_profiling_mode == nullptr) || (strcmp("true", env_profiling_mode) != 0)
|| (strcmp(prof_conf.options, "\0") == 0)) {
return SUCCESS;
}
// enable profiling by env
is_execute_profiling_ = true;
GELOGI("The profiling in env is %s, %s", env_profiling_mode, prof_conf.options);
}
if (!is_load_profiling_) {
if (!is_execute_profiling_) {
return SUCCESS;
}
// Parse json str for bp fp
Status ret = ParseOptions(prof_conf.options);
if (ret != ge::SUCCESS) {
GELOGE(ge::PARAM_INVALID, "Parse taining trace param failed.");
return ge::PARAM_INVALID;
}
if (memcpy_s(prof_conf.jobId, sizeof(prof_conf.jobId), options.job_id.c_str(),
sizeof(options.job_id.c_str())) != EOK) {
GELOGE(INTERNAL_ERROR, "copy job_id failed.");
@ -134,23 +132,55 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return ge::SUCCESS;
}
ge::Status ProfilingManager::ParseOptions(const std::string &options) {
if (options.empty()) {
GELOGE(ge::PARAM_INVALID, "Profiling options is empty.")
return ge::PARAM_INVALID;
}
try {
Json prof_options = Json::parse(options);
const std::string training_trace = prof_options[kTrainingTrace];
if (training_trace.empty()) {
GELOGI("Training trace will not take effect.");
return ge::SUCCESS;
}
GELOGI("GE profiling training trace:%s", training_trace.c_str());
if (training_trace != "on") {
GELOGE(ge::PARAM_INVALID, "Training trace param:%s is invalid.", training_trace.c_str());
return ge::PARAM_INVALID;
}
fp_point = prof_options[kFpPoint];
bp_point = prof_options[kBpPoint];
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
} catch (...) {
GELOGE(FAILED, "Json prof_conf options is invalid.");
return ge::PARAM_INVALID;
}
return ge::SUCCESS;
}
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProfiling() {
#ifdef DAVINCI_SUPPORT_PROFILING
uint64_t module = GetProfilingModule();
// The following if case will not be executed in normal case, inc case of ProfStopProfiling is abnormal
int32_t device_num = static_cast<int32_t>(device_id_.size());
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) {
GELOGE(FAILED, "Stop profiling: device id ptr is null.");
return;
}
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
}
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
if (device_num != 0) {
auto device_id_ptr = std::unique_ptr<uint32_t[]>(new (std::nothrow) uint32_t[device_num]);
if (device_id_ptr == nullptr) {
GELOGE(FAILED, "Stop profiling: device id ptr is null.");
return;
}
for (int32_t i = 0; i < device_num; i++) {
device_id_ptr[i] = static_cast<uint32_t>(device_id_[i]);
}
rtError_t rt_ret = rtProfilerStop(module, device_num, device_id_ptr.get());
if (rt_ret != RT_ERROR_NONE) {
GELOGW("Call rtProfilerStop failed, ret:%d", rt_ret);
}
}
// stop profiling
int32_t cb_ret = prof_cb_.msprofCtrlCallback(static_cast<uint32_t>(MsprofCtrlCallbackType::MSPROF_CTRL_FINALIZE),
nullptr, 0);
@ -475,6 +505,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::ProfFi
std::lock_guard<std::mutex> lock(mutex_);
is_load_profiling_ = false;
is_training_trace_ = false;
is_execute_profiling_ = false;
// profiling plugin uninit
PluginUnInit();
@ -714,7 +745,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool ProfilingManager::Profilin
execute_model_prof_on = true;
}
GELOGI("Flag is_execute_profiling: %d, execute_model_prof_on: %d", is_execute_profiling_, execute_model_prof_on);
return is_execute_profiling_ || execute_model_prof_on;
return execute_model_prof_on;
}
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::PluginInit() const {
@ -744,5 +775,40 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ProfilingManager::CallMs
static_cast<void *>(&reporter_data), sizeof(ReporterData));
}
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpPoint(
std::string &fp_point, std::string &bp_point) {
// Env or options mode, fp_point_/bp_point_ have initiliazed on profiling init
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Bp Fp have been initialized in env or options");
fp_point = fp_point_;
bp_point = bp_point_;
GELOGI("Bp Fp have been initailized in env or options, bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str());
return;
}
// ProfApi mode and training trace is set
try {
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX);
if (ret != EN_OK) {
GELOGI("PROFILING_OPTIONS env is not exist.");
return;
}
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
Json prof_options = Json::parse(env_profiling_options);
fp_point_ = prof_options[kFpPoint];
bp_point_ = prof_options[kBpPoint];
fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
} catch (...) {
GELOGE(FAILED, "Json prof options is invalid.");
return ge::PARAM_INVALID;
}
}
} // namespace ge

@ -63,7 +63,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
bool ProfilingModelExecuteOn() const;
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // only used by command pattern
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
@ -76,8 +76,10 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
struct MsprofCallback &GetMsprofCallback() { return prof_cb_; }
void SetMsprofCtrlCallback(MsprofCtrlCallback func) { prof_cb_.msprofCtrlCallback = func; }
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
void GetFpBpPoint(std::string &fp_point, std::string &bp_point);
private:
Status InitFromOptions(const Options &options, MsprofGeOptions &prof_conf);
Status ParseOptions(const std::string &options);
Status ProfParseParam(const std::map<std::string, std::string> &config_para, int32_t &device_num,
vector<int32_t> &device_list);
Status ProfParseDeviceId(const std::map<std::string, std::string> &config_para,
@ -96,6 +98,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
uint32_t subscribe_count_;
std::mutex mutex_;
MsprofCallback prof_cb_;
std::string fp_point_;
std::string bp_point_;
};
} // namespace ge
#endif // GE_COMMON_PROFILING_PROFILING_MANAGER_H_

@ -49,8 +49,6 @@ const char *const kIsLastNode = "is_last_node";
const char *const kIsInputVar = "INPUT_IS_VAR";
const char *const kIsOutputVar = "OUTPUT_IS_VAR";
const char *const kProfilingMode = "PROFILING_MODE";
const char *const kProfilingFpPoint = "FP_POINT";
const char *const kProfilingBpPoint = "BP_POINT";
const uint32_t kProfilingArStep = 2;
const uint64_t kProfilingFpStartLogid = 1;
const uint64_t kProfilingBpEndLogid = 2;
@ -810,35 +808,23 @@ Status TaskGenerator::GetFpBpIndex(const ComputeGraphPtr &graph, ProfilingPoint
vector<uint32_t> &all_reduce_nodes, std::string &fp_point_str,
std::string &bp_point_str) const {
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_FPPONIT_OPTIONS, fp_point_str) == SUCCESS &&
ge::GetContext().GetOption(OPTION_EXEC_PROFILING_BPPONIT_OPTIONS, bp_point_str) == SUCCESS &&
!fp_point_str.empty() && !bp_point_str.empty()) {
return SUCCESS;
}
ProfilingManager::Instance().GetFpBpPoint(fp_point_str, bp_point_str);
Status ret = SUCCESS;
const char *fp_point = std::getenv(kProfilingFpPoint);
if (fp_point == nullptr) {
if (fp_point_str.empty()) {
ret = AutoFindFpOpIndex(graph, profiling_point);
if (ret != SUCCESS) {
GELOGW("First forward profiling op_index not set and FindFpOpIndex failed.");
return FAILED;
}
} else {
fp_point_str = string(fp_point);
GELOGI("Get fp_point_str from env %s", fp_point_str.c_str());
}
const char *bp_point = std::getenv(kProfilingBpPoint);
if (bp_point == nullptr) {
if (bp_point_str.empty()) {
ret = AutoFindBpOpIndex(graph, profiling_point, all_reduce_nodes);
if (ret != SUCCESS) {
GELOGW("Last backward profiling op_index not set and FindBpOpIndex failed.");
return FAILED;
}
} else {
bp_point_str = string(bp_point);
GELOGI("Get bp_point_str from env %s", bp_point_str.c_str());
}
return SUCCESS;

Loading…
Cancel
Save