/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "ge/ge_prof.h" #include "ge/ge_api.h" #include "init/gelib.h" #include "common/debug/log.h" #include "framework/common/debug/ge_log.h" #include "common/profiling/profiling_manager.h" #include "graph/load/graph_loader.h" #include "toolchain/prof_acl_api.h" using std::map; using std::string; using std::vector; namespace { const uint32_t kMaxDeviceNum = 64; const uint32_t kDeviceListIndex = 3; const std::string kProfilingInit = "prof_init"; const std::string kProfilingFinalize = "prof_finalize"; const std::string kProfilingStart = "prof_start"; const std::string kProfilingStop = "prof_stop"; const std::string kDeviceNums = "devNums"; const std::string kDeviceIdList = "devIdList"; const std::string kAicoreMetrics = "aicoreMetrics"; const std::map kProfAicoreMetricsToString = { {ge::kAicoreArithmaticThroughput, "AICORE_ARITHMATIC_THROUGHPUT"}, {ge::kAicorePipeline, "AICORE_PIPELINE"}, {ge::kAicoreSynchronization, "AICORE_SYNCHRONIZATION"}, {ge::kAicoreMemory, "AICORE_MEMORY"}, {ge::kAicoreInternalMemory, "AICORE_INTERNAL_MEMORY"}, {ge::kAicoreStall, "AICORE_STALL"}}; } // namespace static bool g_graph_prof_init_ = false; static std::mutex g_prof_mutex_; namespace ge { struct aclgrphProfConfig { ProfConfig config; }; Status aclgrphProfInit(const char *profiler_path, uint32_t length) { GELOGT(TRACE_INIT, "Graph prof init start"); std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); return FAILED; } std::lock_guard lock(g_prof_mutex_); if (g_graph_prof_init_) { GELOGW("Multi graph profiling initializations."); return GE_PROF_MULTI_INIT; } Status ret = CheckPath(profiler_path, length); if (ret != SUCCESS) { GELOGE(ret, "Profiling config path is invalid."); return ret; } // if command mode is set, just return if (ProfilingManager::Instance().ProfilingOn()) { GELOGW("Graph prof init failed, cause profiling command pattern is running."); return GE_PROF_MODE_CONFLICT; } ret = ProfInit(profiler_path); if (ret != SUCCESS) { GELOGE(ret, "ProfInit init fail"); return ret; } GraphLoader graph_loader; Command command; command.cmd_params.clear(); command.cmd_type = kProfilingInit; command.module_index = PROF_MODEL_LOAD; ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command %s failed, config = %s", kProfilingInit.c_str(), profiler_path); return ret; } if (!g_graph_prof_init_) { g_graph_prof_init_ = true; GELOGI("Profiling init successfully."); } GELOGI("Successfully execute GraphProfInit."); return SUCCESS; } Status aclgrphProfFinalize() { std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); return FAILED; } std::lock_guard lock(g_prof_mutex_); // if command mode is set, just return if (ProfilingManager::Instance().ProfilingOn()) { GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); return GE_PROF_MODE_CONFLICT; } if (!g_graph_prof_init_) { GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); return GE_PROF_NOT_INIT; } GraphLoader graph_loader; Command command; command.cmd_params.clear(); command.cmd_type = kProfilingFinalize; Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command %s failed.", kProfilingFinalize.c_str()); return ret; } ret = ProfFinalize(); if (ret != SUCCESS) { GELOGE(ret, "Finalize profiling failed, result = %d", ret); } if (ret == SUCCESS) { g_graph_prof_init_ = false; GELOGI("Successfully execute GraphProfFinalize."); } return ret; } bool TransProfConfigToParam(const aclgrphProfConfig *profiler_config, vector &prof_config_params) { prof_config_params.clear(); prof_config_params.emplace_back(kDeviceNums); prof_config_params.emplace_back(std::to_string(profiler_config->config.devNums)); prof_config_params.emplace_back(kDeviceIdList); std::string devID = ""; if (profiler_config->config.devNums == 0) { GELOGW("The device num is invalid."); return false; } for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { devID.append(std::to_string(profiler_config->config.devIdList[i])); if (i != profiler_config->config.devNums - 1) { devID.append(","); } } prof_config_params.push_back(devID); prof_config_params.push_back(kAicoreMetrics); auto iter = kProfAicoreMetricsToString.find(static_cast(profiler_config->config.aicoreMetrics)); if (iter == kProfAicoreMetricsToString.end()) { GELOGW("The prof aicore metrics is invalid."); return false; } prof_config_params.push_back(iter->second); return true; } bool isProfConfigValid(const uint32_t *deviceid_list, uint32_t device_nums) { if (deviceid_list == nullptr) { GELOGE(PARAM_INVALID, "deviceIdList is nullptr"); return false; } if (device_nums == 0 || device_nums > kMaxDeviceNum) { GELOGE(PARAM_INVALID, "The device nums is invalid."); return false; } // real device num int32_t dev_count = 0; rtError_t rt_err = rtGetDeviceCount(&dev_count); if (rt_err != RT_ERROR_NONE) { GELOGE(INTERNAL_ERROR, "Get the Device count fail."); return false; } if (device_nums > static_cast(dev_count)) { GELOGE(PARAM_INVALID, "Device num(%u) is not in range 1 ~ %d.", device_nums, dev_count); return false; } std::unordered_set record; for (size_t i = 0; i < device_nums; ++i) { uint32_t dev_id = deviceid_list[i]; if (dev_id >= static_cast(dev_count)) { GELOGE(PARAM_INVALID, "Device id %u is not in range 0 ~ %d(exclude %d)", dev_id, dev_count, dev_count); return false; } if (record.count(dev_id) > 0) { GELOGE(PARAM_INVALID, "Device id %u is duplicatedly set", dev_id); return false; } record.insert(dev_id); } return true; } aclgrphProfConfig *aclgrphProfCreateConfig(uint32_t *deviceid_list, uint32_t device_nums, ProfilingAicoreMetrics aicore_metrics, ProfAicoreEvents *aicore_events, uint64_t data_type_config) { if (!isProfConfigValid(deviceid_list, device_nums)) { return nullptr; } aclgrphProfConfig *config = new (std::nothrow) aclgrphProfConfig(); if (config == nullptr) { GELOGE(INTERNAL_ERROR, "new aclgrphProfConfig fail"); return nullptr; } config->config.devNums = device_nums; if (memcpy_s(config->config.devIdList, sizeof(config->config.devIdList), deviceid_list, device_nums * sizeof(uint32_t)) != EOK) { GELOGE(INTERNAL_ERROR, "copy devID failed. size = %u", device_nums); delete config; return nullptr; } config->config.aicoreMetrics = static_cast(aicore_metrics); config->config.dataTypeConfig = data_type_config; GELOGI("Successfully create prof config."); return config; } Status aclgrphProfDestroyConfig(aclgrphProfConfig *profiler_config) { if (profiler_config == nullptr) { GELOGE(PARAM_INVALID, "destroy profilerConfig failed, profilerConfig must not be nullptr"); return PARAM_INVALID; } delete profiler_config; GELOGI("Successfully destroy prof config."); return SUCCESS; } Status aclgrphProfStart(aclgrphProfConfig *profiler_config) { if (profiler_config == nullptr) { GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); return FAILED; } std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); return FAILED; } std::lock_guard lock(g_prof_mutex_); // if command mode is set, just return if (ProfilingManager::Instance().ProfilingOn()) { GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); return GE_PROF_MODE_CONFLICT; } if (!g_graph_prof_init_) { GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); return GE_PROF_NOT_INIT; } Status ret = ProfStartProfiling(&profiler_config->config); if (ret != SUCCESS) { GELOGE(ret, "Start profiling failed, prof result = %d", ret); return FAILED; } std::vector prof_params; if (!TransProfConfigToParam(profiler_config, prof_params)) { GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); return PARAM_INVALID; } GraphLoader graph_loader; Command command; command.cmd_params.clear(); command.cmd_type = kProfilingStart; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; GELOGI("Profiling will start, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str(), command.module_index); ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); return FAILED; } GELOGI("Successfully execute GraphProfStartProfiling."); return SUCCESS; } Status aclgrphProfStop(aclgrphProfConfig *profiler_config) { if (profiler_config == nullptr) { GELOGE(PARAM_INVALID, "aclgrphProfConfig is invalid."); return FAILED; } std::shared_ptr instance_ptr = ge::GELib::GetInstance(); if (instance_ptr == nullptr || !instance_ptr->InitFlag()) { GELOGE(GE_CLI_GE_NOT_INITIALIZED, "Ge client is not initialized."); return FAILED; } std::lock_guard lock(g_prof_mutex_); // if command mode is set, just return if (ProfilingManager::Instance().ProfilingOn()) { GELOGW("Graph prof finalize failed, cause profiling command pattern is running."); return GE_PROF_MODE_CONFLICT; } if (!g_graph_prof_init_) { GELOGE(GE_PROF_NOT_INIT, "Graph not profiling initialize."); return GE_PROF_NOT_INIT; } for (uint32_t i = 0; i < profiler_config->config.devNums; i++) { uint64_t data_type_config; Status status = ProfGetDataTypeConfig(profiler_config->config.devIdList[i], data_type_config); if (status != SUCCESS) { GELOGE(status, "Prof get data type config failed, prof result = %d", status); return status; } if (data_type_config != profiler_config->config.dataTypeConfig) { GELOGE(FAILED, "data type config verify failed"); return FAILED; } } std::vector prof_params; if (!TransProfConfigToParam(profiler_config, prof_params)) { GELOGE(PARAM_INVALID, "Transfer profilerConfig to string vector failed"); return PARAM_INVALID; } GraphLoader graph_loader; Command command; command.cmd_params.clear(); command.cmd_type = kProfilingStop; command.cmd_params = prof_params; command.module_index = profiler_config->config.dataTypeConfig; GELOGI("Profiling will stop, device nums:%s , deviceID:[%s], data type config: 0x%llx", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str(), command.module_index); Status ret = graph_loader.CommandHandle(command); if (ret != SUCCESS) { GELOGE(ret, "Handle profiling command failed"); return FAILED; } ret = ProfStopProfiling(&profiler_config->config); if (ret != SUCCESS) { GELOGE(ret, "Stop profiling failed, prof result = %d", ret); return ret; } GELOGI("Successfully execute GraphProfStopProfiling."); return SUCCESS; } } // namespace ge