|
|
|
@ -114,9 +114,6 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
device_id_ = device_id;
|
|
|
|
|
// exp: export PROFILING_MODE=true
|
|
|
|
|
// export PROFILING_OPTIONS=training_trace
|
|
|
|
|
const char *prof_options_str = std::getenv("PROFILING_OPTIONS");
|
|
|
|
|
// register Framework to profiling
|
|
|
|
|
int result = Msprof::Engine::RegisterEngine("Framework", engine_0_.get());
|
|
|
|
|
if (result != 0) {
|
|
|
|
@ -124,53 +121,51 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (prof_options_str != nullptr) {
|
|
|
|
|
const string prof_options_str_tmp = prof_options_str;
|
|
|
|
|
vector<string> opts = Split(prof_options_str_tmp, ':');
|
|
|
|
|
if (!opts.empty()) {
|
|
|
|
|
// current one docker only use one device`
|
|
|
|
|
Json p_device;
|
|
|
|
|
|
|
|
|
|
// JOBID
|
|
|
|
|
auto job_id = GetJobId();
|
|
|
|
|
p_device["jobID"] = std::to_string(job_id);
|
|
|
|
|
|
|
|
|
|
// device_id
|
|
|
|
|
p_device["deviceID"] = std::to_string(device_id);
|
|
|
|
|
|
|
|
|
|
// features:'training_trace', 'task_trace' etc
|
|
|
|
|
Json features;
|
|
|
|
|
for (vector<string>::size_type i = 0; i < opts.size(); i++) {
|
|
|
|
|
Json f;
|
|
|
|
|
f["name"] = opts[i];
|
|
|
|
|
features[i] = f;
|
|
|
|
|
}
|
|
|
|
|
p_device["features"] = features;
|
|
|
|
|
|
|
|
|
|
// only one device, but sProfMgrStartUp API require for device list
|
|
|
|
|
Json devices;
|
|
|
|
|
devices[0] = p_device;
|
|
|
|
|
|
|
|
|
|
Json startCfg;
|
|
|
|
|
startCfg["startCfg"] = devices;
|
|
|
|
|
|
|
|
|
|
// convert json to string
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
ss << startCfg;
|
|
|
|
|
std::string cfg = ss.str();
|
|
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "profiling config " << cfg;
|
|
|
|
|
|
|
|
|
|
// call profiling startup API
|
|
|
|
|
ProfMgrCfg prof_cfg = {cfg};
|
|
|
|
|
prof_handle_ = ProfMgrStartUp(&prof_cfg);
|
|
|
|
|
if (prof_handle_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Startup profiling failed.";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
auto context = MsContext::GetInstance();
|
|
|
|
|
MS_EXCEPTION_IF_NULL(context);
|
|
|
|
|
const string prof_options_str = context->profiling_options();
|
|
|
|
|
vector<string> opts = Split(prof_options_str, ':');
|
|
|
|
|
if (opts.empty()) {
|
|
|
|
|
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// current one docker only use one device`
|
|
|
|
|
Json p_device;
|
|
|
|
|
// JOBID
|
|
|
|
|
auto job_id = GetJobId();
|
|
|
|
|
p_device["jobID"] = std::to_string(job_id);
|
|
|
|
|
// device_id
|
|
|
|
|
p_device["deviceID"] = std::to_string(device_id);
|
|
|
|
|
// features:'training_trace', 'task_trace' etc
|
|
|
|
|
Json features;
|
|
|
|
|
for (vector<string>::size_type i = 0; i < opts.size(); i++) {
|
|
|
|
|
Json f;
|
|
|
|
|
f["name"] = opts[i];
|
|
|
|
|
features[i] = f;
|
|
|
|
|
}
|
|
|
|
|
p_device["features"] = features;
|
|
|
|
|
// only one device, but sProfMgrStartUp API require for device list
|
|
|
|
|
Json devices;
|
|
|
|
|
devices[0] = p_device;
|
|
|
|
|
|
|
|
|
|
Json startCfg;
|
|
|
|
|
startCfg["startCfg"] = devices;
|
|
|
|
|
|
|
|
|
|
// convert json to string
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
ss << startCfg;
|
|
|
|
|
std::string cfg = ss.str();
|
|
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "profiling config " << cfg;
|
|
|
|
|
|
|
|
|
|
// call profiling startup API
|
|
|
|
|
ProfMgrCfg prof_cfg = {cfg};
|
|
|
|
|
prof_handle_ = ProfMgrStartUp(&prof_cfg);
|
|
|
|
|
if (prof_handle_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Startup profiling failed.";
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|