/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "graph/load/new_model_manager/model_manager.h" #include #include "common/dump/dump_manager.h" #include "common/l2_cache_optimize.h" #include "common/profiling/profiling_manager.h" #include "common/properties_manager.h" #include "framework/common/debug/ge_log.h" #include "framework/common/util.h" #include "graph/common/ge_call_wrapper.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/davinci_model.h" #include "graph/load/new_model_manager/davinci_model_parser.h" #include "model/ge_root_model.h" #include "graph/common/local_context.h" #include "common/formats/utils/formats_trans_utils.h" #include "hybrid/hybrid_davinci_model.h" namespace ge { thread_local uint32_t device_count = 0; namespace { const int kCmdParSize = 2; const int kDumpCmdPairSize = 2; const std::size_t kProfCmdParaMaxSize = 1000; const std::size_t kProfStartCmdParaSize = 2; const std::string kCmdTypeProfile = "profile"; const std::string kCmdTypeDump = "dump"; const std::string kCmdTypeProfiling = "profiling"; const std::string kCmdTypeProfInit = "prof_init"; const std::string kCmdTypeProfFinalize = "prof_finalize"; const std::string kCmdTypeProfStart = "prof_start"; const std::string kCmdTypeProfStop = "prof_stop"; const std::string kCmdTypeProfModelSubscribe = "prof_model_subscribe"; const std::string kCmdTypeProfModelUnsubscribe = "prof_model_cancel_subscribe"; const char *const kBatchLoadBuf = "batchLoadsoFrombuf"; const char *const kDeleteCustOp = "deleteCustOp"; struct CustAicpuSoBuf { uint64_t kernelSoBuf; uint32_t kernelSoBufLen; uint64_t kernelSoName; uint32_t kernelSoNameLen; } __attribute__((packed)); struct BatchLoadOpFromBufArgs { uint32_t soNum; uint64_t args; } __attribute__((packed)); } // namespace DumpProperties ModelManager::dump_properties_; std::mutex ModelManager::exeception_infos_mutex_; std::shared_ptr ModelManager::GetInstance() { static const std::shared_ptr instance_ptr = shared_ptr(new (std::nothrow) ModelManager(), ModelManager::FinalizeForPtr); return instance_ptr; } ModelManager::ModelManager() { max_model_id_ = 0; session_id_bias_ = 0; } Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, uint64_t session_id, uint32_t model_id) { STR_FWK_OP_KERNEL param_base = {}; void *devicebase = nullptr; void *aicpu_kernel_addr = nullptr; const uint32_t kKernelType = 0; param_base.fwkKernelType = kKernelType; param_base.fwkKernelBase.fwk_kernel.opType = op_type; param_base.fwkKernelBase.fwk_kernel.sessionID = session_id; if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) { std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); auto iter = model_aicpu_kernel_.find(model_key); if (iter != model_aicpu_kernel_.end()) { GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id); v_aicpu_kernel = model_aicpu_kernel_.at(model_key); // Insert size of aicpu kernel vector in the first element v_aicpu_kernel.insert(v_aicpu_kernel.begin(), v_aicpu_kernel.size()); auto kernel_size = sizeof(uint64_t) * (v_aicpu_kernel.size()); rtError_t rt_ret = rtMalloc(&aicpu_kernel_addr, kernel_size, RT_MEMORY_HBM); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret);) rt_ret = rtMemcpy(aicpu_kernel_addr, kernel_size, v_aicpu_kernel.data(), kernel_size, RT_MEMCPY_HOST_TO_DEVICE); GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret); GE_CHK_RT(rtFree(aicpu_kernel_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);) uint64_t kernel_id_addr = static_cast(reinterpret_cast(aicpu_kernel_addr)); param_base.fwkKernelBase.fwk_kernel.kernelID = kernel_id_addr; // In the scene of loading once and running many times, the kernel needs to be destroyed many times, // and connot be removed from kernel map. } } rtError_t rt_ret = rtMalloc(&(devicebase), sizeof(STR_FWK_OP_KERNEL), RT_MEMORY_HBM); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "malloc device memory failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtMemcpy(devicebase, sizeof(STR_FWK_OP_KERNEL), ¶m_base, sizeof(STR_FWK_OP_KERNEL), RT_MEMCPY_HOST_TO_DEVICE); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "memory copy to device failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); return RT_ERROR_TO_GE_STATUS(rt_ret); } rtStream_t stream = nullptr; rt_ret = rtStreamCreate(&stream, 0); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "create stream failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtKernelLaunchEx(devicebase, sizeof(STR_FWK_OP_KERNEL), 0, stream); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtKernelLaunchEx failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtStreamSynchronize(stream); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtStreamSynchronize failed. ret: 0x%X", rt_ret); GE_IF_BOOL_EXEC(aicpu_kernel_addr != nullptr, GE_CHK_RT(rtFree(aicpu_kernel_addr))); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); } if (aicpu_kernel_addr != nullptr) { rt_ret = rtFree(aicpu_kernel_addr); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); GE_CHK_RT(rtFree(devicebase)); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); } } rt_ret = rtFree(devicebase); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "free memory failed. ret: 0x%X", rt_ret); GE_CHK_RT(rtStreamDestroy(stream)); return RT_ERROR_TO_GE_STATUS(rt_ret); } rt_ret = rtStreamDestroy(stream); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtStreamDestroy failed. ret: 0x%X", rt_ret); return RT_ERROR_TO_GE_STATUS(rt_ret); } return SUCCESS; } void ModelManager::DestroyAicpuSession(uint64_t session_id) { std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); if (it == sess_ids_.end()) { GELOGI("The session: %lu not created.", session_id); return; } else { rtContext_t ctx = nullptr; bool has_ctx = (rtCtxGetCurrent(&ctx) == RT_ERROR_NONE); if (!has_ctx) { GELOGI("Set device %u.", GetContext().DeviceId()); GE_CHK_RT(rtSetDevice(static_cast(GetContext().DeviceId()))); } Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_DESTROY, session_id, 0); if (ret != SUCCESS) { GELOGW("The session: %lu destroy failed.", session_id); } else { (void)sess_ids_.erase(session_id); GELOGI("The session: %lu destroyed.", session_id); } if (!has_ctx) { GELOGI("Reset device %u.", GetContext().DeviceId()); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); } } } ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) { std::lock_guard lock(map_mutex_); auto hybrid_davinci_model = hybrid_model_map_.find(model_id); if (hybrid_davinci_model != hybrid_model_map_.end()) { uint64_t session_id = hybrid_davinci_model->second->GetSessionId(); DestroyAicpuSession(session_id); return SUCCESS; } auto it = model_map_.find(model_id); if (it == model_map_.end()) { GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id); return GE_EXEC_MODEL_ID_INVALID; } uint64_t session_id = it->second->GetSessionId(); DestroyAicpuSession(session_id); return SUCCESS; } ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) { GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id); std::lock_guard lock(sess_ids_mutex_); std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id); if (ret != SUCCESS) { GELOGE(FAILED, "Destroy aicpu kernel failed."); return FAILED; } } return SUCCESS; } ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) { std::lock_guard lock(sess_ids_mutex_); std::vector v_aicpu_kernel; std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id); if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) { v_aicpu_kernel = model_aicpu_kernel_.at(model_key); } v_aicpu_kernel.push_back(kernel_id); model_aicpu_kernel_[model_key] = v_aicpu_kernel; return SUCCESS; } ModelManager::~ModelManager() { std::lock_guard lock(map_mutex_); model_map_.clear(); model_aicpu_kernel_.clear(); cust_aicpu_so_.clear(); GE_IF_BOOL_EXEC(device_count > 0, GE_CHK_RT(rtDeviceReset(0))); } /// /// @ingroup domi_ome /// @brief set Device. If no device available, return failure /// @return Status run result /// @author /// Status ModelManager::SetDevice(int32_t deviceId) const { GE_CHK_RT_RET(rtSetDevice(deviceId)); return SUCCESS; } ge::Status ModelManager::SetDynamicSize(uint32_t model_id, const std::vector &batch_num, int32_t dynamic_type) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->SetDynamicSize(batch_num, dynamic_type); return SUCCESS; } ge::Status ModelManager::DoLoadHybridModelOnline(uint32_t model_id, const shared_ptr &ge_root_model, const shared_ptr &listener) { auto hybrid_model = hybrid::HybridDavinciModel::Create(ge_root_model); GE_CHECK_NOTNULL(hybrid_model); hybrid_model->SetListener(listener); hybrid_model->SetModelId(model_id); hybrid_model->SetDeviceId(GetContext().DeviceId()); GE_CHK_STATUS_RET(hybrid_model->Init(), "Failed to init hybrid model. model_id = %u", model_id); auto shared_model = std::shared_ptr(hybrid_model.release()); InsertModel(model_id, shared_model); return SUCCESS; } /// /// @ingroup domi_ome /// @brief load model online /// @return Status run result /// Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr &ge_root_model, std::shared_ptr listener) { GE_CHK_BOOL_RET_STATUS(listener.get() != nullptr, PARAM_INVALID, "Param incorrect, listener is null"); if (model_id == INVALID_MODEL_ID) { GenModelId(&model_id); } bool is_shape_unknown = false; GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u", model_id); if (is_shape_unknown || GetContext().GetHostExecFlag()) { return DoLoadHybridModelOnline(model_id, ge_root_model, listener); } GE_CHK_STATUS_RET(SetDevice(static_cast(GetContext().DeviceId())), "Set device failed, model id:%u.", model_id); mmTimespec timespec = mmGetTickCount(); std::shared_ptr davinci_model = MakeShared(0, listener); if (davinci_model == nullptr) { GELOGE(FAILED, "davinci_model is nullptr"); return FAILED; } davinci_model->SetId(model_id); davinci_model->SetDeviceId(GetContext().DeviceId()); const DumpProperties &dump_properties = PropertiesManager::Instance().GetDumpProperties(GetContext().SessionId()); davinci_model->SetDumpProperties(dump_properties); dump_properties_ = dump_properties; auto root_graph = ge_root_model->GetRootGraph(); GE_CHECK_NOTNULL(root_graph); string root_model_name = root_graph->GetName(); auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel(); GeModelPtr ge_model = name_to_model[root_model_name]; Status ret = SUCCESS; do { GE_TIMESTAMP_START(Assign); GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Assign(ge_model)), GELOGW("assign model to modeldef failed."); break;); GE_TIMESTAMP_END(Assign, "GraphLoader::ModelAssign"); GE_TIMESTAMP_START(Init); GE_IF_BOOL_EXEC(SUCCESS != (ret = davinci_model->Init()), GELOGW("DavinciInit failed."); break;); GE_TIMESTAMP_END(Init, "GraphLoader::ModelInit"); InsertModel(model_id, davinci_model); GELOGI("Parse model %u success.", model_id); davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); } while (0); GE_CHK_RT(rtDeviceReset(static_cast(GetContext().DeviceId()))); return ret; } void ModelManager::InsertModel(uint32_t id, std::shared_ptr &davinci_model) { GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id); std::lock_guard lock(map_mutex_); model_map_[id] = davinci_model; } void ModelManager::InsertModel(uint32_t id, shared_ptr &hybrid_model) { GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id); std::lock_guard lock(map_mutex_); hybrid_model_map_[id] = hybrid_model; } Status ModelManager::DeleteModel(uint32_t id) { std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); auto hybrid_model_it = hybrid_model_map_.find(id); if (it != model_map_.end()) { uint64_t session_id = it->second->GetSessionId(); std::string model_key = std::to_string(session_id) + "_" + std::to_string(id); auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key); if (iter_aicpu_kernel != model_aicpu_kernel_.end()) { (void)model_aicpu_kernel_.erase(iter_aicpu_kernel); } (void)model_map_.erase(it); } else if (hybrid_model_it != hybrid_model_map_.end()) { (void)hybrid_model_map_.erase(hybrid_model_it); } else { GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id); return GE_EXEC_MODEL_ID_INVALID; } return SUCCESS; } std::shared_ptr ModelManager::GetModel(uint32_t id) { std::lock_guard lock(map_mutex_); auto it = model_map_.find(id); return (it == model_map_.end()) ? nullptr : it->second; } std::shared_ptr ModelManager::GetHybridModel(uint32_t id) { std::lock_guard lock(map_mutex_); auto it = hybrid_model_map_.find(id); return (it == hybrid_model_map_.end()) ? nullptr : it->second; } Status ModelManager::Unload(uint32_t model_id) { GE_CHK_STATUS_RET(DeleteModel(model_id), "failed to unload model id: %u", model_id); if (device_count > 0) { device_count--; GELOGI("Unload model %u success.", model_id); } else { GELOGI("Unload model %u success.no need reset device,device_count: %u", model_id, device_count); } std::lock_guard lock(exeception_infos_mutex_); exception_infos_.clear(); return SUCCESS; } Status ModelManager::UnloadModeldef(uint32_t model_id) { GE_CHK_STATUS_RET(DeleteModel(model_id), "failed to unload modeldef id: %u", model_id); return SUCCESS; } Status ModelManager::DataInput(const InputData &input_data, OutputData &output_data) { GELOGI("calling the DataInput"); shared_ptr data_wrap(new (std::nothrow) InputDataWrapper()); GE_CHECK_NOTNULL(data_wrap); Status status = data_wrap->Init(input_data, output_data); if (status != SUCCESS) { GELOGE(domi::PUSH_DATA_FAILED, "Init InputDataWrapper failed, input data index: %u.", input_data.index); return domi::PUSH_DATA_FAILED; } uint32_t model_id = input_data.model_id; output_data.model_id = model_id; std::shared_ptr model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); GE_IF_BOOL_EXEC(model->GetDataInputTid() == 0, model->SetDataInputTid(mmGetTid())); DataInputer *inputer = model->GetDataInputer(); GE_CHECK_NOTNULL(inputer); if (inputer->Push(data_wrap) != SUCCESS) { GELOGE(domi::DATA_QUEUE_ISFULL, "Data queue is full, please call again later, model_id %u ", model_id); return domi::DATA_QUEUE_ISFULL; } GELOGD("Data input success, model id:%u", model_id); return SUCCESS; } Status ModelManager::GetCurDynamicDims(const vector> &user_real_input_dims, const vector>> &user_input_dims, vector &cur_dynamic_dims) { GELOGD(" Start get cur dynamic dims."); if (user_real_input_dims.size() != user_input_dims.size()) { GELOGE(INTERNAL_ERROR, "The input count of user: %zu should be equal to the data count of graph: %zu", user_real_input_dims.size(), user_input_dims.size()); return INTERNAL_ERROR; } for (size_t i = 0; i < user_input_dims.size(); ++i) { if (user_real_input_dims[i].size() != user_input_dims[i].second.size()) { GELOGE(INTERNAL_ERROR, "The shape size: %zu of dynamic input: %s should be equal to the shape size of input shape: %zu.", user_real_input_dims[i].size(), user_input_dims[i].first.c_str(), user_input_dims[i].second.size()); return INTERNAL_ERROR; } for (size_t j = 0; j < user_input_dims.at(i).second.size(); ++j) { if (user_input_dims.at(i).second.at(j) < 0) { cur_dynamic_dims.emplace_back(user_real_input_dims[i][j]); } } } GELOGD("Cur dynamic dims is %s.", formats::JoinToString(cur_dynamic_dims).c_str()); bool cur_dynamic_dims_valid = false; std::vector shape_strs = ge::StringUtils::Split(GetLocalOmgContext().dynamic_dims, ';'); for (auto dynamic_dim : shape_strs) { if (dynamic_dim == formats::JoinToString(cur_dynamic_dims)) { cur_dynamic_dims_valid = true; break; } } if (!cur_dynamic_dims_valid) { GELOGE(INTERNAL_ERROR, "Cur dynamic dims is %s, not exist in options.", formats::JoinToString(cur_dynamic_dims).c_str()); return INTERNAL_ERROR; } return SUCCESS; } /// /// @ingroup domi_ome /// @brief load Input and output TensorInfo for Model /// @return Status run result /// Status ModelManager::DataInputTensor(uint32_t model_id, const std::vector &inputs) { std::shared_ptr model = GetModel(model_id); auto hybrid_model = GetHybridModel(model_id); if (hybrid_model == nullptr) { GE_CHECK_NOTNULL(model); } InputData input_data; input_data.model_id = model_id; input_data.timeout = 0; input_data.timestamp = 0; input_data.index = 0; for (size_t i = 0; i < inputs.size(); ++i) { DataBuffer data; data.data = inputs[i].data; data.length = inputs[i].length; input_data.blobs.push_back(data); } if (!GetLocalOmgContext().user_input_dims.empty() && GetLocalOmgContext().need_multi_batch) { std::vector cur_dynamic_dims; if (!GetLocalOmgContext().user_real_input_dims.empty()) { if (GetCurDynamicDims(GetLocalOmgContext().user_real_input_dims, GetLocalOmgContext().user_input_dims, cur_dynamic_dims) != SUCCESS) { GELOGE(INTERNAL_ERROR, "[Train_Dynamic] Failed to Parse real_dynamic_dims."); return INTERNAL_ERROR; } DataBuffer data; data.data = new(std::nothrow) int64_t[cur_dynamic_dims.size()]; GE_CHECK_NOTNULL(data.data); uint64_t length = static_cast(cur_dynamic_dims.size() * sizeof(int64_t)); GE_CHK_BOOL_EXEC(memcpy_s(data.data, length, cur_dynamic_dims.data(), length) == EOK, return INTERNAL_ERROR, "Failed to memcpy data."); data.length = length; input_data.blobs.push_back(data); } } OutputData output_data; output_data.model_id = model_id; output_data.index = 0; shared_ptr data_wrap(new (std::nothrow) InputDataWrapper()); GE_CHECK_NOTNULL(data_wrap); GE_CHK_STATUS_EXEC(data_wrap->Init(input_data, output_data), return domi::PUSH_DATA_FAILED, "Init InputDataWrapper failed,input data model_id is : %u.", model_id); if (hybrid_model != nullptr) { GE_CHK_STATUS_RET(hybrid_model->EnqueueData(data_wrap), "Data queue is full, please call again later, model_id %u ", model_id); return SUCCESS; } GE_CHK_BOOL_RET_STATUS(model != nullptr, PARAM_INVALID, "Invalid model id %u in InputData! ", model_id); DataInputer *inputer = model->GetDataInputer(); GE_CHECK_NOTNULL(inputer); GE_CHK_STATUS_EXEC(inputer->Push(data_wrap), return domi::DATA_QUEUE_ISFULL, "Data queue is full, please call again later, model_id %u ", model_id); GELOGD("Data input success, model id:%u", model_id); return SUCCESS; } /// /// @ingroup domi_ome /// @brief create model thread, start to execute model /// @param [in] model_id Model ID to be started /// @return Status model run result /// @author /// Status ModelManager::Start(uint32_t model_id) { auto hybrid_model = GetHybridModel(model_id); if (hybrid_model != nullptr) { GE_CHK_STATUS_RET_NOLOG(hybrid_model->ModelRunStart()); GELOGI("Start hybrid model %u success.", model_id); return SUCCESS; } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to start! ", model_id); Status status = davinci_model->ModelRunStart(); if (status == SUCCESS) { GELOGI("Start model %u success.", model_id); } return status; } /// /// @ingroup domi_ome /// @brief Model ID stop /// @only when unloaded /// @param [in] model_id Model ID to be stopped /// @return Status model stop result /// @author /// Status ModelManager::Stop(uint32_t model_id) { auto hybrid_model = GetHybridModel(model_id); if (hybrid_model != nullptr) { GE_CHK_STATUS_RET_NOLOG(hybrid_model->ModelRunStop()); GELOGI("Stop hybrid model %u success.", model_id); return SUCCESS; } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u to stop!", model_id); Status status = davinci_model->ModelRunStop(); if (status == SUCCESS) { GELOGI("Stop model %u success.", model_id); } return status; } /// /// @ingroup domi_ome /// @brief Command handle /// @iterator 1 only Ieference, Debug 2 modes /// @param [in] command command to handle /// @return Status command handle result /// @author /// Status ModelManager::HandleCommand(const Command &command) { static const std::map> cmds = { {kCmdTypeProfile, HandleProfileCommand}, {kCmdTypeDump, HandleDumpCommand}, {kCmdTypeProfiling, HandleAclProfilingCommand}, {kCmdTypeProfInit, HandleProfInitCommand}, {kCmdTypeProfFinalize, HandleProfFinalizeCommand}, {kCmdTypeProfStart, HandleProfStartCommand}, {kCmdTypeProfStop, HandleProfStopCommand}, {kCmdTypeProfModelSubscribe, HandleProfModelSubscribeCommand}, {kCmdTypeProfModelUnsubscribe, HandleProfModelUnsubscribeCommand}}; auto iter = cmds.find(command.cmd_type); if (iter == cmds.end()) { GELOGE(PARAM_INVALID, "Unsupported command: %s", command.cmd_type.c_str()); return PARAM_INVALID; } else { return iter->second(command); } } Status ModelManager::HandleAclProfilingCommand(const Command &command) { if (command.cmd_params.size() < kCmdParSize) { GELOGE(PARAM_INVALID, "When the cmd_type is 'profiling', the size of cmd_params must larger than 2."); return PARAM_INVALID; } std::string map_key = command.cmd_params[0]; std::string value = command.cmd_params[1]; if (map_key == PROFILE_CONFIG) { ProfilingManager::Instance().SetProfilingConfig(value); } return SUCCESS; } Status ModelManager::GetModelByCmd(const Command &command, std::shared_ptr &davinci_model) { if (command.cmd_params.size() < kCmdParSize) { GELOGE(PARAM_INVALID, "When the cmd_type is '%s', the size of cmd_params must larger than 2.", command.cmd_type.c_str()); return PARAM_INVALID; } std::string map_key = command.cmd_params[0]; std::string value = command.cmd_params[1]; if (map_key == PROFILE_MODEL_ID) { int32_t model_id = 0; try { model_id = std::stoi(value); } catch (std::invalid_argument &) { GELOGE(PARAM_INVALID, "Model id: %s is invalid.", value.c_str()); return PARAM_INVALID; } catch (std::out_of_range &) { GELOGE(PARAM_INVALID, "Model id: %s is out of range.", value.c_str()); return PARAM_INVALID; } catch (...) { GELOGE(FAILED, "Model id: %s cannot change to int.", value.c_str()); return FAILED; } auto model_manager = ModelManager::GetInstance(); GE_CHECK_NOTNULL(model_manager); davinci_model = model_manager->GetModel(static_cast(model_id)); if (davinci_model == nullptr) { GELOGE(FAILED, "Model id: %d is invaild or model is not loaded.", model_id); return FAILED; } } else { GELOGE(FAILED, "The model_id parameter is not found in the command."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfModelSubscribeCommand(const Command &command) { std::shared_ptr davinci_model = nullptr; Status ret = GetModelByCmd(command, davinci_model); if (ret != SUCCESS) { return ret; } if (ProfilingManager::Instance().ProfModelSubscribe(command.module_index, static_cast(davinci_model.get())) != SUCCESS) { GELOGE(FAILED, "Handle prof model subscribe failed."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfModelUnsubscribeCommand(const Command &command) { std::shared_ptr davinci_model = nullptr; Status ret = GetModelByCmd(command, davinci_model); if (ret != SUCCESS) { return ret; } if (ProfilingManager::Instance().ProfModelUnsubscribe(static_cast(davinci_model.get())) != SUCCESS) { GELOGE(FAILED, "Handle prof model unsubscribe failed."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfInitCommand(const Command &command) { uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfInit(module_index) != SUCCESS) { GELOGE(FAILED, "Handle prof init failed."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfFinalizeCommand(const Command &command) { if (ProfilingManager::Instance().ProfFinalize() != SUCCESS) { GELOGE(FAILED, "Handle prof finalize failed."); return FAILED; } return SUCCESS; } /* * cmd para when prof start * "devNums:2" * "devIdList:1,2" * "profilingOption:PROF_OP_TRACE" * "aicoreMetrics:AICORE_ARITHMATIC_THROUGHPUT" */ Status ModelManager::HandleProfStartCommand(const Command &command) { if (command.cmd_params.size() < kProfStartCmdParaSize) { GELOGE(PARAM_INVALID, "When the cmd_type is 'profile start', the size of cmd_params must larger than 2."); return PARAM_INVALID; } if (command.cmd_params.size() > kProfCmdParaMaxSize) { GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size()); return PARAM_INVALID; } std::map cmd_params_map; uint32_t step = 2; for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { if (i + 1 >= command.cmd_params.size()) { continue; } cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; } uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfStartProfiling(module_index, cmd_params_map) != SUCCESS) { GELOGE(FAILED, "Handle prof start failed."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfStopCommand(const Command &command) { if (command.cmd_params.size() < kProfStartCmdParaSize) { GELOGE(PARAM_INVALID, "When the cmd_type is 'profile stop', the size of cmd_params must larger than 2."); return PARAM_INVALID; } if (command.cmd_params.size() > kProfCmdParaMaxSize) { GELOGE(PARAM_INVALID, "Command para size[%zu] larger than max[1000].", command.cmd_params.size()); return PARAM_INVALID; } std::map cmd_params_map; uint32_t step = 2; for (uint32_t i = 0; i < command.cmd_params.size(); i += step) { if (i + 1 >= command.cmd_params.size()) { continue; } cmd_params_map[command.cmd_params[i]] = command.cmd_params[i + 1]; } uint64_t module_index = command.module_index; if (ProfilingManager::Instance().ProfStopProfiling(module_index, cmd_params_map) != SUCCESS) { GELOGE(FAILED, "Handle prof finalize failed."); return FAILED; } return SUCCESS; } Status ModelManager::HandleProfileCommand(const Command &command) { if (command.cmd_params.size() < kCmdParSize) { GELOGE(PARAM_INVALID, "When the cmd_type is 'profile', the size of cmd_params must larger than 2."); return PARAM_INVALID; } std::string map_key = command.cmd_params[0]; std::string value = command.cmd_params[1]; GELOGI("Profiling mode, Command key:%s , value:%s ", map_key.c_str(), value.c_str()); auto iter = PROFILE_COMPONENT_MAP.find(map_key); if (iter != PROFILE_COMPONENT_MAP.end()) { std::string property_value = (value == "on") ? "1" : "0"; PropertiesManager::Instance().SetPropertyValue(iter->second, property_value); } if ((map_key == PROFILER_JOBCTX || map_key == PROFILER_TARGET_PATH || map_key == RTS_PROFILE_PATH)) { PropertiesManager::Instance().SetPropertyValue(map_key, value); } return SUCCESS; } static Status ParserPara(const Command &command, const string &dump_key, string &dump_value) { auto iter = std::find(command.cmd_params.begin(), command.cmd_params.end(), dump_key); if (iter != command.cmd_params.end()) { ++iter; if (iter == command.cmd_params.end()) { GELOGE(PARAM_INVALID, "Invalid access."); return PARAM_INVALID; } dump_value = *iter; } return SUCCESS; } Status ModelManager::HandleDumpCommand(const Command &command) { if (command.cmd_params.size() % kDumpCmdPairSize != 0) { GELOGE(PARAM_INVALID, "When the cmd_type is 'dump', the size of cmd_params must be a even number."); return PARAM_INVALID; } std::string dump_status("off"); std::string dump_model(DUMP_ALL_MODEL); std::string dump_path("/"); std::string dump_mode("output"); std::set dump_layers; auto ret = ParserPara(command, DUMP_STATUS, dump_status); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "parser dump status failed"); return FAILED; } GELOGI("dump status = %s.", dump_status.c_str()); ret = ParserPara(command, DUMP_MODEL, dump_model); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "parser dump model failed"); return FAILED; } GELOGI("dump model = %s.", dump_model.c_str()); if (dump_status == "off" || dump_status == "OFF") { dump_properties_.DeletePropertyValue(dump_model); return SUCCESS; } for (size_t i = 0; i < command.cmd_params.size() / kDumpCmdPairSize; ++i) { if (command.cmd_params.at(i * kDumpCmdPairSize).find(DUMP_LAYER) != std::string::npos) { GELOGI("dump layer: %s.", command.cmd_params.at(i * kDumpCmdPairSize + 1).c_str()); dump_layers.insert(command.cmd_params.at(i * kDumpCmdPairSize + 1)); } } ret = ParserPara(command, DUMP_FILE_PATH, dump_path); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "parser dump path failed"); return FAILED; } if (!dump_path.empty() && dump_path[dump_path.size() - 1] != '/') { dump_path = dump_path + "/"; } dump_path = dump_path + CurrentTimeInStr() + "/"; GELOGI("dump path = %s.", dump_path.c_str()); ret = ParserPara(command, DUMP_MODE, dump_mode); if (ret != SUCCESS) { GELOGE(PARAM_INVALID, "parser dump mode failed"); return FAILED; } GELOGI("dump mode = %s", dump_mode.c_str()); dump_properties_.AddPropertyValue(dump_model, dump_layers); dump_properties_.SetDumpPath(dump_path); dump_properties_.SetDumpMode(dump_mode); return SUCCESS; } Status ModelManager::GetMaxUsedMemory(const uint32_t model_id, uint64_t &max_size) { auto hybrid_model = GetHybridModel(model_id); if (hybrid_model != nullptr) { max_size = 0; return SUCCESS; } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetMaxUsedMemory Failed, Invalid model id %u!", model_id); max_size = davinci_model->TotalMemSize(); return SUCCESS; } Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &input_desc, vector &output_desc) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc); } Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector &input_desc, vector &output_desc, std::vector &inputFormats, std::vector &outputFormats, bool new_model_desc) { std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { hybrid_davinci_model->SetModelDescVersion(new_model_desc); return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); davinci_model->SetModelDescVersion(new_model_desc); return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats); } /// /// @ingroup ge /// @brief Get dynamic batch_info /// @param [in] model_id /// @param [out] batch_info /// @return execute result /// Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector> &batch_info, int32_t &dynamic_type) { std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetDynamicBatchInfo failed, Invalid model id %u!", model_id); return davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type); } /// /// @ingroup ge /// @brief Get combined dynamic dims info /// @param [in] model_id /// @param [out] batch_info /// @return execute result /// Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector> &batch_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetCombinedDynamicDims Failed, Invalid Model ID %u!", model_id); davinci_model->GetCombinedDynamicDims(batch_info); return SUCCESS; } /// /// @ingroup ge /// @brief Get user designate shape order /// @param [in] model_id /// @param [out] user_input_shape_order /// @return execute result /// Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id, std::vector &user_input_shape_order) { auto hybrid_davinci_model = GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); return SUCCESS; } auto davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id) davinci_model->GetUserDesignateShapeOrder(user_input_shape_order); return SUCCESS; } Status ModelManager::GetCurShape(const uint32_t model_id, std::vector &batch_info, int32_t &dynamic_type) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->GetCurShape(batch_info, dynamic_type); return SUCCESS; } Status ModelManager::GetModelAttr(uint32_t model_id, std::vector &dynamic_output_shape_info) { std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } std::shared_ptr davinci_model = GetModel(model_id); GE_CHECK_NOTNULL(davinci_model); davinci_model->GetModelAttr(dynamic_output_shape_info); return SUCCESS; } Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id, vector &input_desc, vector &output_desc, std::vector &inputFormats, std::vector &outputFormats) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id); return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats); } /// /// @ingroup ge /// @brief Get AIPP info /// @param [in] model_id /// @param [in] index /// @param [out] aipp_info /// @return execute result /// Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", model_id); return davinci_model->GetAIPPInfo(index, aipp_info); } Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetAIPPInfo failed, invalid model_id is %u.", model_id); return davinci_model->GetAippType(index, type, aipp_index); } Status ModelManager::GenSessionId(uint64_t &session_id) { std::lock_guard lock(session_id_create_mutex_); mmTimeval tv; if (mmGetTimeOfDay(&tv, nullptr) != 0) { GELOGE(INTERNAL_ERROR, "Failed to get current time."); return INTERNAL_ERROR; } session_id = static_cast(tv.tv_sec * 1000000 + tv.tv_usec); // 1000000us session_id_bias_++; // max bais 100. session_id_bias_ = session_id_bias_ % 100; session_id = session_id * 100 + session_id_bias_; GELOGD("Generate new session id: %lu.", session_id); return SUCCESS; } Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model, shared_ptr listener, void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) { GE_CHK_BOOL_RET_STATUS(model.key.empty() || mmAccess2(model.key.c_str(), M_F_OK) == EN_OK, ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is invalid, %s", model.key.c_str(), strerror(errno)); GenModelId(&model_id); shared_ptr davinci_model = nullptr; mmTimespec timespec = mmGetTickCount(); ModelHelper model_helper; Status ret = model_helper.LoadModel(model); if (ret != SUCCESS) { GELOGE(ret, "load model failed."); return ret; } do { GeModelPtr ge_model = model_helper.GetGeModel(); try { davinci_model = std::make_shared(model.priority, listener); } catch (std::bad_alloc &) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed"); return ACL_ERROR_GE_MEMORY_ALLOCATION; } catch (...) { GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise"); return INTERNAL_ERROR; } ret = davinci_model->Assign(ge_model); if (ret != SUCCESS) { GELOGW("assign model failed."); break; } davinci_model->SetId(model_id); int32_t device_id = 0; rtError_t rt_ret = rtGetDevice(&device_id); if (rt_ret != RT_ERROR_NONE || device_id < 0) { GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id); return RT_ERROR_TO_GE_STATUS(rt_ret); } davinci_model->SetDeviceId(device_id); davinci_model->SetOmName(model.om_name); if (DumpManager::GetInstance().GetDumpProperties().IsDumpOpen()) { davinci_model->SetDumpProperties(DumpManager::GetInstance().GetDumpProperties()); } else { davinci_model->SetDumpProperties(dump_properties_); } /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. /// These session_ids come from the same model, so the values of session_id are the same. /// Update session_id for infer in load model to avoid the same session_id. uint64_t new_session_id; ret = GenSessionId(new_session_id); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Generate session_id for infer failed."); ret = davinci_model->UpdateSessionId(new_session_id); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "Update session_id for infer failed."); ret = davinci_model->Init(dev_ptr, mem_size, weight_ptr, weight_size); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, break, "DavinciInit failed."); InsertModel(model_id, davinci_model); GELOGI("Parse model %u success.", model_id); davinci_model->SetProfileTime(MODEL_LOAD_START, (timespec.tv_sec * 1000 * 1000 * 1000 + timespec.tv_nsec)); // 1000 ^ 3 converts second to nanosecond davinci_model->SetProfileTime(MODEL_LOAD_END); GE_IF_BOOL_EXEC(ret == SUCCESS, device_count++); return SUCCESS; } while (0); return ret; } /// /// @ingroup ge /// @brief ACL case, Load task list with queue. /// @param [out] model_id: model id for manager. /// @param [in] model_data: Model data load from offline model file. /// @param [in] input_que_ids: input queue ids from user, num equals Data Op. /// @param [in] output_que_ids: input queue ids from user, num equals NetOutput Op. /// @return: 0 for success / others for fail /// Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_data, const std::vector &input_queue_ids, const std::vector &output_queue_ids) { GE_CHK_BOOL_RET_STATUS(model_data.key.empty() || mmAccess2(model_data.key.c_str(), M_F_OK) == EN_OK, ACL_ERROR_GE_PARAM_INVALID, "input key file path %s is not valid, %s", model_data.key.c_str(), strerror(errno)); ModelHelper model_helper; Status ret = model_helper.LoadModel(model_data); if (ret != SUCCESS) { GELOGE(ret, "load model failed."); return ret; } shared_ptr davinci_model = MakeShared(model_data.priority, nullptr); if (davinci_model == nullptr) { GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "create model failed."); return ACL_ERROR_GE_MEMORY_ALLOCATION; } ret = davinci_model->Assign(model_helper.GetGeModel()); if (ret != SUCCESS) { GELOGE(ret, "assign model failed."); return ret; } /// In multi-threaded inference, using the same session_id among multiple threads may cause some threads to fail. /// These session_ids come from the same model, so the values of session_id are the same. /// Update session_id for infer in load model to avoid the same session_id. uint64_t new_session_id; ret = GenSessionId(new_session_id); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Generate session_id for infer failed."); ret = davinci_model->UpdateSessionId(new_session_id); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Update session_id for infer failed."); GenModelId(&model_id); davinci_model->SetId(model_id); ret = davinci_model->SetQueIds(input_queue_ids, output_queue_ids); if (ret != SUCCESS) { GELOGE(ret, "set model queue ids failed."); return ret; } davinci_model->SetDumpProperties(dump_properties_); ret = davinci_model->Init(); if (ret != SUCCESS) { GELOGE(ret, "init model failed."); return ret; } InsertModel(model_id, davinci_model); GELOGI("Parse model %u success.", model_id); return SUCCESS; } /// /// @ingroup domi_ome /// @brief ACL case, not start new thread, return result /// @param [in] model_id mode id /// @param [in] stream model stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data input data /// @param [in] input_desc description of input data /// @param [out] output_data output data /// @param [out] output_desc description of output data /// Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data, const std::vector &input_desc, OutputData &output_data, std::vector &output_desc) { std::shared_ptr hybrid_davinci_model = GetHybridModel(model_id); if (hybrid_davinci_model != nullptr) { auto inputs = input_data.blobs; auto outputs = output_data.blobs; Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream); if (status == SUCCESS) { GELOGI("Execute model %u success.", model_id); } return status; } std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id); if (davinci_model->NeedDestroyAicpuKernel()) { GELOGI("Start to destroy specified aicpu kernel."); // Zero copy is enabled by default, no need to judge. uint64_t session_id_davinci = davinci_model->GetSessionId(); uint32_t model_id_davinci = davinci_model->GetModelId(); Status status = DestroyAicpuKernel(session_id_davinci, model_id_davinci); if (status != SUCCESS) { GELOGW("Destroy specified aicpu kernel failed, session id is %lu, model id is %u.", session_id_davinci, model_id_davinci); } } Status status = davinci_model->NnExecute(stream, async_mode, input_data, output_data); if (status == SUCCESS) { GELOGD("Execute model %u success.", model_id); } return status; } Status ModelManager::CreateAicpuSession(uint64_t session_id) { std::lock_guard lock(sess_ids_mutex_); auto it = sess_ids_.find(session_id); // never been created by any model if (it == sess_ids_.end()) { Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_SESSION_CREATE, session_id, 0); if (ret == SUCCESS) { (void)sess_ids_.insert(session_id); GELOGI("The session: %lu create success.", session_id); } return ret; } return SUCCESS; } Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) { GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str()); std::lock_guard lock(cust_aicpu_mutex_); CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr()); if (aicpu_kernel == nullptr) { GELOGE(INTERNAL_ERROR, "cust aicpu op %s can't find kernel!", op_desc->GetName().c_str()); return INTERNAL_ERROR; } // get current context rtContext_t rt_cur_ctx = nullptr; auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); if (rt_error != RT_ERROR_NONE) { GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); return RT_FAILED; } // use current context as resource key uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); auto it = cust_aicpu_so_.find(resource_id); if (it == cust_aicpu_so_.end()) { std::map new_so_name; new_so_name.insert({so_name, aicpu_kernel}); cust_aicpu_so_[resource_id] = new_so_name; GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id); return SUCCESS; } auto it_so_name = it->second.find(so_name); if (it_so_name == it->second.end()) { it->second.insert({so_name, aicpu_kernel}); GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id); } return SUCCESS; } Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) { std::lock_guard lock(cust_aicpu_mutex_); if (cust_aicpu_so_.size() == 0) return SUCCESS; // get current context rtContext_t rt_cur_ctx = nullptr; auto rt_error = rtCtxGetCurrent(&rt_cur_ctx); if (rt_error != RT_ERROR_NONE) { GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast(rt_error)); return RT_FAILED; } uintptr_t resource_id = reinterpret_cast(rt_cur_ctx); auto it = cust_aicpu_so_.find(resource_id); if (it == cust_aicpu_so_.end()) { GELOGI("Cust aicpu so map is empty, context id %lu", resource_id); return SUCCESS; } vector allocated_mem; rtError_t status; rtStream_t stream = nullptr; vector v_cust_so; void *args = nullptr; for (const auto &it_so : it->second) { const void *aicpu_data = it_so.second->GetBinData(); uint32_t aicpu_data_length = it_so.second->GetBinDataSize(); string so_name = it_so.first; void *d_aicpu_data = nullptr; void *d_so_name = nullptr; status = rtMalloc(&d_aicpu_data, aicpu_data_length, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_aicpu_data); status = rtMalloc(&d_so_name, so_name.size(), RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(d_so_name); GE_CHK_RT(rtMemcpy(d_aicpu_data, aicpu_data_length, aicpu_data, aicpu_data_length, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtMemcpy(d_so_name, so_name.size(), reinterpret_cast(so_name.c_str()), so_name.size(), RT_MEMCPY_HOST_TO_DEVICE)); CustAicpuSoBuf cust_aicpu_so_buf; cust_aicpu_so_buf.kernelSoBuf = static_cast(reinterpret_cast(d_aicpu_data)); cust_aicpu_so_buf.kernelSoBufLen = aicpu_data_length; cust_aicpu_so_buf.kernelSoName = static_cast(reinterpret_cast(d_so_name)); cust_aicpu_so_buf.kernelSoNameLen = so_name.size(); v_cust_so.push_back(cust_aicpu_so_buf); } if (kernel_name == kDeleteCustOp) { (void)cust_aicpu_so_.erase(it); } uint32_t args_size = sizeof(CustAicpuSoBuf) * v_cust_so.size(); status = rtMalloc(&args, args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(args); GE_CHK_RT(rtMemcpy(args, args_size, v_cust_so.data(), args_size, RT_MEMCPY_HOST_TO_DEVICE)); BatchLoadOpFromBufArgs batch_cust_so; batch_cust_so.soNum = v_cust_so.size(); batch_cust_so.args = static_cast(reinterpret_cast(args)); void *batch_args = nullptr; uint32_t batch_args_size = sizeof(BatchLoadOpFromBufArgs); status = rtMalloc(&batch_args, batch_args_size, RT_MEMORY_HBM); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt failed, status: 0x%x", status); return RT_ERROR_TO_GE_STATUS(status); } allocated_mem.push_back(batch_args); GE_CHK_RT(rtMemcpy(batch_args, batch_args_size, static_cast(&batch_cust_so), batch_args_size, RT_MEMCPY_HOST_TO_DEVICE)); GE_CHK_RT(rtStreamCreate(&stream, 0)); GE_CHK_RT(rtCpuKernelLaunch(nullptr, kernel_name.c_str(), 1, batch_args, batch_args_size, nullptr, stream)); status = rtStreamSynchronize(stream); if (status != RT_ERROR_NONE) { GELOGE(RT_FAILED, "Call rt stream sync failed, status: 0x%x", status); return RT_ERROR_TO_GE_STATUS(status); } std::function callback = [&]() { for (auto mem : allocated_mem) { GE_CHK_RT(rtFree(mem)); } GE_CHK_RT(rtStreamDestroy(stream)); }; GE_MAKE_GUARD(release, callback); GELOGI("Cpu kernel launch task success."); return SUCCESS; } Status ModelManager::ClearAicpuSo() { GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kDeleteCustOp), "delete cust op so failed."); return SUCCESS; } Status ModelManager::LaunchCustAicpuSo() { GE_CHK_STATUS_RET(LaunchKernelCustAicpuSo(kBatchLoadBuf), "launch cust op so failed."); return SUCCESS; } /// /// @ingroup ge /// @brief get model memory size and weight /// @param [in] const ModelData model: model type /// @param [out] size_t memSize: model memory usage /// size_t weightSize: model weight and memory size /// @return SUCCESS success / others failure /// Status ModelManager::GetModelMemAndWeightSize(const ModelData &model, size_t &mem_size, size_t &weight_size) { uint8_t *model_data = nullptr; uint32_t model_len = 0; Status ret = DavinciModelParser::ParseModelContent(model, model_data, model_len); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "parse model content failed!"); OmFileLoadHelper om_file_helper; ret = om_file_helper.Init(model_data, model_len); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "om file helperInit failed!"); auto partition_table = reinterpret_cast(model_data); if (partition_table->num == 1) { GELOGE(ACL_ERROR_GE_PARAM_INVALID, "om model is error,please use executable om model"); return ACL_ERROR_GE_PARAM_INVALID; } ModelPartition task_partition; if (om_file_helper.GetModelPartition(ModelPartitionType::TASK_INFO, task_partition) != SUCCESS) { GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "get task model partition failed."); return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } std::shared_ptr model_task_def = MakeShared(); if (model_task_def == nullptr) { return MEMALLOC_FAILED; } if (task_partition.size != 0) { if (!ReadProtoFromArray(task_partition.data, static_cast(task_partition.size), model_task_def.get())) { GELOGE(ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED, "ReadProtoFromArray failed."); return ACL_ERROR_GE_EXEC_LOAD_TASK_PARTITION_FAILED; } } ModelPartition partition_weight; ret = om_file_helper.GetModelPartition(ModelPartitionType::WEIGHTS_DATA, partition_weight); GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_EXEC_LOAD_WEIGHT_PARTITION_FAILED, "Get weight partition failed. ret = %u", ret); mem_size = model_task_def->memory_size(); weight_size = partition_weight.size; return SUCCESS; } void ModelManager::GenModelId(uint32_t *id) { if (id == nullptr) { return; } std::lock_guard lock(map_mutex_); *id = ++max_model_id_; } Status ModelManager::GetOrigInputInfo(uint32_t model_id, uint32_t index, OriginInputInfo &orig_input_info) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetOrigInputInfo failed, invalid model_id is %u.", model_id); return davinci_model->GetOrigInputInfo(index, orig_input_info); } Status ModelManager::GetAllAippInputOutputDims(uint32_t model_id, uint32_t index, std::vector &input_dims, std::vector &output_dims) { std::shared_ptr davinci_model = GetModel(model_id); GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "GetAllAippInputOutputDims failed, invalid model_id is %u.", model_id); return davinci_model->GetAllAippInputOutputDims(index, input_dims, output_dims); } bool ModelManager::IsDynamicShape(uint32_t model_id) { auto model = GetHybridModel(model_id); return model != nullptr; } ge::Status ModelManager::SyncExecuteModel(uint32_t model_id, const vector &inputs, vector &outputs) { auto model = GetHybridModel(model_id); if (model == nullptr) { GELOGE(FAILED, "Hybrid model not found. model id = %u.", model_id); return FAILED; } return model->Execute(inputs, outputs); } Status ModelManager::GetOpDescInfo(uint32_t device_id, uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) { for (const auto &model : model_map_) { auto davinci_model = model.second; if (davinci_model->GetDeviceId() == device_id) { GELOGI("Start to GetOpDescInfo of device_id: %u.", device_id); if (davinci_model->GetOpDescInfo(stream_id, task_id, op_desc_info)) { GELOGI("Find specific node of stream_id: %u, task_id: %u.", stream_id, task_id); return SUCCESS; } } } return FAILED; } Status ModelManager::EnableExceptionDump(const std::map &options) { auto iter = options.find(OPTION_EXEC_ENABLE_EXCEPTION_DUMP); if (iter != options.end()) { GELOGI("Find option enable_exeception_dump is %s", iter->second.c_str()); if (iter->second == "1") { rtError_t rt_ret = rtSetTaskFailCallback(reinterpret_cast(ExceptionCallback)); if (rt_ret != RT_ERROR_NONE) { GELOGE(RT_FAILED, "rtSetTaskFailCallback failed"); return RT_ERROR_TO_GE_STATUS(rt_ret); } } else { GELOGI("Option enable exception dump is %s", iter->second.c_str()); } } else { GELOGI("Not find option enable exception dump"); } return SUCCESS; } } // namespace ge