/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ #define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_ #include #include #include #include #include #include #include "common/ge_types.h" #include "common/helper/model_helper.h" #include "common/helper/om_file_helper.h" #include "common/opskernel/ge_task_info.h" #include "common/properties_manager.h" #include "common/types.h" #include "framework/common/util.h" #include "graph/debug/ge_attr_define.h" #include "graph/load/new_model_manager/aipp_utils.h" #include "graph/load/new_model_manager/data_dumper.h" #include "graph/load/new_model_manager/data_inputer.h" #include "graph/load/new_model_manager/model_utils.h" #include "graph/load/new_model_manager/zero_copy_offset.h" #include "graph/load/new_model_manager/zero_copy_task.h" #include "graph/model.h" #include "graph/node.h" #include "graph/op_desc.h" #include "graph/operator.h" #include "graph/utils/attr_utils.h" #include "graph/utils/tensor_utils.h" #include "mmpa/mmpa_api.h" #include "proto/task.pb.h" #include "task_info/task_info.h" #include "graph/common/local_context.h" namespace ge { // op debug need 2048 bits buffer const size_t kOpDebugMemorySize = 2048UL; const size_t kDebugP2pSize = 8UL; typedef enum tagModelProcStage { MODEL_LOAD_START = 1, MODEL_LOAD_END, MODEL_PRE_PROC_START, MODEL_PRE_PROC_END, MODEL_INFER_START, MODEL_INFER_END, MODEL_AFTER_PROC_START, MODEL_AFTER_PROC_END, MODEL_PROC_INVALID, } ModelProcStage; struct timeInfo { uint32_t modelId; int64_t processBeginTime; int64_t processEndTime; int64_t inferenceBeginTime; int64_t inferenceEndTime; int64_t dumpBeginTime; int64_t dumpEndTime; }; // For super kernel struct SuperKernelTaskInfo { uint32_t last_block_dim; uint32_t last_args_size; uint32_t last_task_id; uint32_t last_stream_id; void *last_stream; void *last_sm_desc; std::vector kernel_list; std::vector arg_list; std::vector dump_flag_list; std::vector op_desc_list; std::vector dump_args_list; uint32_t last_dump_flag; int64_t last_group_key; uintptr_t last_dump_args; OpDescPtr last_op; }; struct TaskMemInfo { int64_t input_size{0}; int64_t output_size{0}; int64_t weight_size{0}; int64_t workspace_size{0}; int64_t total_size{0}; }; struct ProfileInfo { FusionOpInfo fusion_info; TaskMemInfo memory_info; uint32_t task_count{0}; }; enum ExecuteMode { INITIALIZATION, SYNCHRONIZATION, ASYNCHRONIZATION, }; // comments class DavinciModel { public: /// /// @ingroup ge /// @brief DavinciModel constructor /// @author /// DavinciModel(int32_t priority, const std::shared_ptr &listener); /// /// @ingroup ge /// @brief DavinciModel desctructor, free Parse and Init resources /// @author /// ~DavinciModel(); /// /// @ingroup ge /// @brief apply model to model_def_ /// Status Assign(const GeModelPtr &ge_model); /// /// @ingroup ge /// @brief DavinciModel initialization, including Stream, ccHandle, Event, DataInputer, etc /// @return execute result /// @author /// Status Init(void *dev_ptr = nullptr, size_t memsize = 0, void *weight_ptr = nullptr, size_t weightsize = 0); /// /// @ingroup ge /// @brief ACL case, Load task list with queue. /// @param [in] input_que_ids: input queue ids from user, nums equal Data Op. /// @param [in] output_que_ids: input queue ids from user, nums equal NetOutput Op. /// @return: 0 for success / others for fail /// Status SetQueIds(const std::vector &input_queue_ids, const std::vector &output_queue_ids); /// /// @ingroup ge /// @brief Get DataInputer /// @return model ID /// uint32_t Id() const { return model_id_; } /// /// @ingroup ge /// @brief Get DataInputer /// @return model ID /// void SetId(uint32_t model_id) { model_id_ = model_id; } static void *Run(DavinciModel *model_pointer); /// /// @ingroup ge /// @brief NnExecute /// @param [in] stream execute stream /// @param [in] async_mode is asynchronize mode. /// @param [in] input_data model input data /// @param [out] output_data model output data /// Status NnExecute(rtStream_t stream, bool async_mode, const InputData &input_data, OutputData &output_data); /// /// @ingroup ge /// @brief lock mutex run flag /// @author /// void LockRunFlg() { mux_run_flg_.lock(); } /// /// @ingroup ge /// @brief unlock mutex run flag /// @author /// void UnlockRunFlg() { mux_run_flg_.unlock(); } /// /// @ingroup ge /// @brief get DataInputer /// @return DataInputer pointer /// DataInputer *const GetDataInputer() const { return data_inputer_; } // get Stream number uint32_t StreamNum() const { return runtime_param_.stream_num; } // get Event number uint32_t EventNum() const { return runtime_param_.event_num; } // get Lable number uint32_t LabelNum() const { return runtime_param_.label_num; } // get batch number uint32_t BatchNum() const { return runtime_param_.batch_num; } // get session id uint64_t SessionId() const { return runtime_param_.session_id; } // get model priority int32_t Priority() const { return priority_; } // get total mem size size_t TotalMemSize() const { return runtime_param_.mem_size; } const std::map &P2PMemInfos() const {return runtime_param_.memory_infos;} // model name string Name() const { return name_; } // om_name string OmName() const { return om_name_; } // version uint32_t Version() const { return version_; } // get total weights mem size size_t TotalWeightsMemSize() const { return runtime_param_.weight_size; } size_t TotalVarMemSize() const { return runtime_param_.var_size; } // get base memory address uint8_t *MemBase() { return mem_base_; } // get weight base memory address uint8_t *WeightsMemBase() { return weights_mem_base_; } uint8_t *VarMemBase() { return var_mem_base_; } // get Event list const vector &GetEventList() const { return event_list_; } const vector &GetStreamList() const { return stream_list_; } const vector &GetLabelList() const { return label_list_; } Status DestroyThread(); // Get Data Op. const vector &GetDataList() const { return data_op_list_; } // get Op OpDescPtr GetOpByIndex(uint32_t index) const { if (op_list_.find(index) == op_list_.end()) { return nullptr; } return op_list_.at(index); } OpDescPtr GetVariableOp(const string &name) { for (auto op_desc : variable_op_list_) { if (op_desc != nullptr && op_desc->GetName() == name) { return op_desc; } } return nullptr; } // get task info for profiling const std::vector &GetTaskDescInfo() const { return task_desc_info_; } // get updated task info list std::vector GetTaskList() { return task_list_; } // Modified from KernelTaskInfo. SuperKernelTaskInfo &GetSuperKernelTaskInfo() { return skt_info_; } /// /// @ingroup ge /// @brief get model input and output format /// @return ccTensorFormat_t current model input and output format /// Format GetFormat(); rtModel_t GetRtModelHandle() const { return rt_model_handle_; } rtStream_t GetRtModelStream() const { return rt_model_stream_; } uint64_t GetRtBaseAddr() const { return runtime_param_.logic_mem_base; } uint64_t GetRtWeightAddr() const { return runtime_param_.logic_weight_base; } uint64_t GetRtVarAddr() const { return runtime_param_.logic_var_base; } uint32_t GetFlowctrlIndex(uint32_t op_index); void PushHcclStream(rtStream_t value); bool IsBroadCastOpData(const NodePtr &var_node); /// /// @ingroup ge /// @brief For TVM Op, avoid Addr Reuse. /// @return void* /// const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = ""); /// /// @ingroup ge /// @brief get model input and output desc info /// @param [out] input_shape model input size /// @param [out] output_shape model output size /// @return execute result /// Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc); Status GetInputOutputDescInfo(vector &input_desc, vector &output_desc, std::vector &inputFormats, std::vector &output_formats); /// /// @ingroup ge /// @brief Get dynamic batch_info /// @param [out] batch_info /// @param [out] dynamic_type /// @return execute result /// Status GetDynamicBatchInfo(std::vector> &batch_info, int32_t &dynamic_type) const; /// /// @ingroup ge /// @brief Get combined dynamic dims info /// @param [out] batch_info /// @return None /// void GetCombinedDynamicDims(std::vector> &batch_info) const; void GetUserDesignateShapeOrder(std::vector &user_input_shape_order) const; void GetCurShape(std::vector &batch_info, int32_t &dynamic_type); void GetModelAttr(std::vector &dynamic_output_shape_info); /// /// @ingroup ge /// @brief Get AIPP input info /// @param [in] index /// @param [out] aipp_info /// @return execute result /// Status GetAIPPInfo(uint32_t index, AippConfigInfo &aipp_info); Status GetAippType(uint32_t index, InputAippType &type, size_t &aipp_index); /// /// @ingroup ge /// @brief Get model_id. /// @return model_id /// uint32_t GetModelId() const { return model_id_; } /// /// @ingroup ge /// @brief get unique identification for op when load two or more models /// @param [in] op_desc : current op. /// @param [in] string identification: unique identification for current op. /// @return None /// void GetUniqueId(const OpDescPtr &op_desc, std::string &unique_identification); /// /// @ingroup ge /// @brief get model input and output desc for zero copy /// @param [out] input_shape model input size /// @param [out] output_shape model output size /// @return execute result /// Status GetInputOutputDescInfoForZeroCopy(vector &input_desc, vector &output_desc, std::vector &inputFormats, std::vector &output_formats); Status ReturnResult(uint32_t data_id, const bool rslt_flg, const bool seq_end_flg, OutputData *output_data); Status ReturnNoOutput(uint32_t data_id); Status ModelRunStart(); /// /// @ingroup ge /// @brief stop run model /// @return Status /// Status ModelRunStop(); /// /// @ingroup ge /// @brief model run flag /// @return Status /// bool RunFlag() const { return run_flg_; } Status GetOutputDescInfo(vector &output_desc, std::vector &formats); /// /// @ingroup ge /// @brief Set Session Id /// @return void /// void SetSessionId(uint64_t session_id) { session_id_ = session_id; } /// /// @ingroup ge /// @brief Get Session Id /// @return sessionID /// uint64_t GetSessionId() const { return session_id_; } /// /// @ingroup ge /// @brief SetDeviceId /// @return void /// void SetDeviceId(uint32_t device_id) { device_id_ = device_id; } /// /// @ingroup ge /// @brief Get device Id /// @return device id /// uint32_t GetDeviceId() const { return device_id_; } bool NeedDestroyAicpuKernel() const { return need_destroy_aicpu_kernel_; } Status UpdateSessionId(uint64_t session_id); const RuntimeParam &GetRuntimeParam() { return runtime_param_; } int32_t GetDataInputTid() const { return dataInputTid; } void SetDataInputTid(int32_t data_input_tid) { dataInputTid = data_input_tid; } void DisableZeroCopy(const void *addr); bool GetOpDugReg() const { return is_op_debug_reg_; } /// /// @ingroup ge /// @brief Save outside address of Data or NetOutput used info for ZeroCopy. /// @param [in] const OpDescPtr &op_desc: current op desc /// @param [in] const std::vector &outside_addrs: address of task /// @param [in] const void *args_offset: arguments address save the address. /// @return None. /// void SetZeroCopyAddr(const OpDescPtr &op_desc, const std::vector &outside_addrs, const void *info, void *args, size_t size, size_t offset); void SetDynamicSize(const std::vector &batch_num, int32_t dynamic_type); bool GetL1FusionEnableOption() { return is_l1_fusion_enable_; } void SetProfileTime(ModelProcStage stage, int64_t endTime = 0); int64_t GetLoadBeginTime() { return load_begin_time_; } int64_t GetLoadEndTime() { return load_end_time_; } Status ReportProfilingData(); void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) { data_dumper_.SaveDumpOpInfo(model_param, op, task_id, stream_id); } void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr &op_desc, uintptr_t args) { data_dumper_.SaveDumpTask(task_id, stream_id, op_desc, args); } void SetEndGraphId(uint32_t task_id, uint32_t stream_id); DavinciModel &operator=(const DavinciModel &model) = delete; DavinciModel(const DavinciModel &model) = delete; const map> &GetHcclFolowStream() { return main_follow_stream_mapping_; } void SaveHcclFollowStream(int64_t main_stream_id, rtStream_t stream); void InitRuntimeParams(); Status InitVariableMem(); void UpdateMemBase(uint8_t *mem_base) { runtime_param_.mem_base = mem_base; mem_base_ = mem_base; } void SetTotalArgsSize(uint32_t args_size) { total_args_size_ += args_size; } uint32_t GetTotalArgsSize() { return total_args_size_; } void *GetCurrentArgsAddr(uint32_t offset) { void *cur_args = static_cast(args_) + offset; return cur_args; } void SetTotalIOAddrs(const vector &io_addrs) { total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end()); } void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; } uint32_t GetHybridArgsSize() { return total_hybrid_args_size_; } void *GetCurrentHybridArgsAddr(uint32_t offset) { void *cur_args = static_cast(hybrid_addrs_) + offset; return cur_args; } void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size); int64_t GetFixedAddrsSize(string tensor_name); void *GetCurrentFixedAddr(int64_t offset) const { void *cur_addr = static_cast(fixed_addrs_) + offset; return cur_addr; } uint32_t GetFixedAddrOutputIndex(string tensor_name) { if (tensor_name_to_peer_output_index_.find(tensor_name) != tensor_name_to_peer_output_index_.end()) { return tensor_name_to_peer_output_index_[tensor_name]; } return UINT32_MAX; } void SetKnownNode(bool known_node) { known_node_ = known_node; } bool IsKnownNode() { return known_node_; } Status MallocKnownArgs(); Status UpdateKnownNodeArgs(const vector &inputs, const vector &outputs); Status CreateKnownZeroCopyMap(const vector &inputs, const vector &outputs); Status UpdateKnownZeroCopyAddr(vector &total_io_addrs); void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; } Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info); Status GetAllAippInputOutputDims(uint32_t index, std::vector &input_dims, std::vector &output_dims); void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; } // om file name void SetOmName(string om_name) { om_name_ = om_name; } void SetDumpProperties(const DumpProperties &dump_properties) { data_dumper_.SetDumpProperties(dump_properties); } const DumpProperties &GetDumpProperties() const { return data_dumper_.GetDumpProperties(); } bool GetOpDescInfo(uint32_t stream_id, uint32_t task_id, OpDescInfo &op_desc_info) const { return data_dumper_.GetOpDescInfo(stream_id, task_id, op_desc_info); } Status InitInputOutputForDynamic(const ComputeGraphPtr &compute_graph); private: // memory address of weights uint8_t *weights_mem_base_; uint8_t *var_mem_base_; // memory address of model uint8_t *mem_base_; uint8_t *p2p_mem_base_; bool is_inner_mem_base_; bool is_inner_weight_base_; bool is_inner_p2p_mem_base_; // input data manager DataInputer *data_inputer_; int64_t load_begin_time_; int64_t load_end_time_; struct timeInfo time_info_; int32_t dataInputTid; /// /// @ingroup ge /// @brief Copy Check input size and model op size. /// @param [in] const int64_t &input_size: input size. /// @param [in] const int64_t &op_size: model op size. /// @param [in] is_dynamic: dynamic batch input flag. /// @return true if success /// bool CheckInputAndModelSize(const int64_t &input_size, const int64_t &op_size, bool is_dynamic); /// /// @ingroup ge /// @brief Set copy only for No task feed NetOutput address. /// @return None. /// void SetCopyOnlyOutput(); /// /// @ingroup ge /// @brief Copy Input/Output to model for direct use. /// @param [in] const InputData &input_data: user input data info. /// @param [in/out] OutputData &output_data: user output data info. /// @param [in] bool is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input /// @return SUCCESS handle successfully / others handle failed /// Status CopyModelData(const InputData &input_data, OutputData &output_data, bool is_dynamic); /// /// @ingroup ge /// @brief Copy Data addr to model for direct use. /// @param [in] data_info: model memory addr/size map { data_index, { tensor_size, tensor_addr } }. /// @param [in] is_input: input data or output data /// @param [in] blobs: user input/output data list. /// @param [in] is_dynamic: whether is dynamic input, true: is dynamic input; false: not is dynamic input /// @param [in] batch_label: batch label for multi-batch scenes /// @return SUCCESS handle successfully / others handle failed /// Status UpdateIoTaskArgs(const std::map &data_info, bool is_input, const vector &blobs, bool is_dynamic, const string &batch_label); Status CopyInputData(const InputData &input_data, bool device_data = false); Status CopyOutputData(uint32_t data_id, OutputData &output_data, rtMemcpyKind_t kind); Status SyncVarData(); Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size); Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size); void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input); void SetInputDimsInfo(const vector &model_input_dims, Format &format, InputOutputDescInfo &input); Status GetInputDescInfo(vector &input_desc, std::vector &formats); Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo); void UnbindHcomStream(); Status DistributeTask(); uint8_t *MallocFeatureMapMem(size_t data_size); uint8_t *MallocWeightsMem(size_t weights_size); uint8_t* MallocP2PMem(size_t p2p_data_size); void FreeFeatureMapMem(); void FreeWeightsMem(); void FreeP2PMem(); void ReleaseTask(); void UnbindTaskSinkStream(); bool IsAicpuKernelConnectSpecifiedLayer(); /// /// @ingroup ge /// @brief Reduce memory usage after task sink. /// @return: void /// void Shrink(); /// /// @ingroup ge /// @brief Travel all nodes and do some init. /// @param [in] compute_graph: ComputeGraph to load. /// @return Status /// Status InitNodes(const ComputeGraphPtr &compute_graph); /// /// @ingroup ge /// @brief Data Op Initialize. /// @param [in] NodePtr: Data Op. /// @param [in/out] data_op_index: NetOutput addr size info. /// @return Status /// Status InitDataOp(const NodePtr &node, uint32_t &data_op_index, map &data_by_index); /// /// @ingroup ge /// @brief Sort Data op list by index. /// @param [in] data_by_index: map of Data Op. /// @return /// void AdjustDataOpList(const map &data_by_index); /// /// @ingroup ge /// @brief NetOutput Op Initialize. /// @param [in] NodePtr: NetOutput Op. /// @return Status /// Status InitNetOutput(const NodePtr &node); /// /// @ingroup ge /// @brief Constant Op Init. /// @return Status /// Status InitConstant(const OpDescPtr &op_desc); Status InitVariable(const OpDescPtr &op_desc); /// @ingroup ge /// @brief LabelSet Op Initialize. /// @param [in] op_desc: LabelSet Op descriptor. /// @return Status Status InitLabelSet(const OpDescPtr &op_desc); Status InitStreamSwitch(const OpDescPtr &op_desc); Status InitStreamActive(const OpDescPtr &op_desc); Status InitStreamSwitchN(const OpDescPtr &op_desc); /// /// @ingroup ge /// @brief Case Op Init. /// @return Status /// Status InitCase(const OpDescPtr &op_desc); Status SetDynamicBatchInfo(const OpDescPtr &op_desc, uint32_t batch_num); /// /// @ingroup ge /// @brief TVM Op Init. /// @return Status /// Status InitTbeHandle(const OpDescPtr &op_desc); void StoreTbeHandle(const std::string &handle_key); void CleanTbeHandle(); /// /// @ingroup ge /// @brief Make active stream list and bind to model. /// @return: 0 for success / others for fail /// Status BindModelStream(); /// /// @ingroup ge /// @brief Init model stream for NN model. /// @return Status /// Status InitModelStream(rtStream_t stream); /// /// @ingroup ge /// @brief ACL, Load task list with queue entrance. /// @return: 0 for success / others for fail /// Status LoadWithQueue(); /// /// @ingroup ge /// @brief ACL, Bind Data Op addr to input queue. /// @return: 0 for success / others for fail /// Status BindInputQueue(); Status CpuTaskModelZeroCopy(std::vector &mbuf_list, std::map &outside_addrs); /// /// @ingroup ge /// @brief ACL, Bind NetOutput Op addr to output queue. /// @return: 0 for success / others for fail /// Status BindOutputQueue(); Status CpuModelPrepareOutput(uintptr_t addr, uint32_t size); /// /// @ingroup ge /// @brief definiteness queue schedule, bind input queue to task. /// @param [in] queue_id: input queue id from user. /// @param [in] addr: Data Op output tensor address. /// @param [in] size: Data Op output tensor size. /// @return: 0 for success / others for fail /// Status CpuModelDequeue(uint32_t queue_id); /// /// @ingroup ge /// @brief definiteness queue schedule, bind output queue to task. /// @param [in] queue_id: output queue id from user. /// @param [in] addr: NetOutput Op input tensor address. /// @param [in] size: NetOutput Op input tensor size. /// @return: 0 for success / others for fail /// Status CpuModelEnqueue(uint32_t queue_id, uintptr_t addr, uint32_t size); /// /// @ingroup ge /// @brief definiteness queue schedule, active original model stream. /// @return: 0 for success / others for fail /// Status CpuActiveStream(); /// /// @ingroup ge /// @brief definiteness queue schedule, wait for end graph. /// @return: 0 for success / others for fail /// Status CpuWaitEndGraph(); Status BindEnqueue(); Status CpuModelEnqueue(uint32_t queue_id, uintptr_t out_mbuf); /// /// @ingroup ge /// @brief definiteness queue schedule, repeat run model. /// @return: 0 for success / others for fail /// Status CpuModelRepeat(); Status InitEntryTask(); Status AddHeadStream(); /// /// @ingroup ge /// @brief set ts device. /// @return: 0 for success / others for fail /// Status SetTSDevice(); Status OpDebugRegister(); void OpDebugUnRegister(); void CheckHasHcomOp(); Status DoTaskSink(); void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result); Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id); // get desc info of graph for profiling Status GetComputeGraphInfo(vector &graph_desc_info); void SetDataDumperArgs(const ComputeGraphPtr &compute_graph); Status InitModelProfile(); Status SinkModelProfile(); Status SinkTimeProfile(const InputData ¤t_data); Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data, std::vector &outputs); void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); void SetLabelForDynamic(const NodePtr &node); void ParseDynamicOutShape(const std::vector &str_info, std::vector> &vec_info); bool IsGetNextSinkDynamic(const OpDescPtr &op_desc); void GetAllGearsInfo(const NodePtr &node); Status GetGetDynamicDimsNodeInfo(const NodePtr &node); Status GetGearAndRealOutSizeInfo(size_t input_count, const NodePtr &node); Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node); Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc); bool is_weight_mem_has_inited_; bool is_feature_map_mem_has_inited_; uint32_t model_id_; uint32_t runtime_model_id_; string name_; // used for inference data dump string om_name_; uint32_t version_; GeModelPtr ge_model_; bool need_destroy_aicpu_kernel_{false}; vector out_node_name_; map op_list_; // data op_desc vector data_op_list_; vector output_op_list_; vector variable_op_list_; std::map new_input_data_info_; std::map new_output_data_info_; std::map new_input_outside_addrs_; std::map new_output_outside_addrs_; std::set real_virtual_addrs_; // output op: save cce op actual needed memory size vector output_memory_size_list_; std::thread thread_id_; std::shared_ptr listener_; bool run_flg_; std::mutex mux_run_flg_; int32_t priority_; vector stream_list_; std::mutex all_hccl_stream_list_mutex_; vector all_hccl_stream_list_; // for reuse hccl_follow_stream std::mutex capacity_of_stream_mutex_; std::map> main_follow_stream_mapping_; vector event_list_; vector label_list_; set label_id_indication_; std::mutex outside_addrs_mutex_; std::vector zero_copy_tasks_; // Task used Data or NetOutput addr. std::set copy_only_addrs_; // Address need copy to original place. std::vector task_list_; // rt_moodel_handle rtModel_t rt_model_handle_; rtStream_t rt_model_stream_; bool is_inner_model_stream_; bool is_async_mode_; // For NN execute, Async mode use rtMemcpyAsync on rt_model_stream_. ExecuteMode last_execute_mode_; bool is_stream_list_bind_{false}; bool is_pure_head_stream_{false}; rtStream_t rt_head_stream_{nullptr}; rtStream_t rt_entry_stream_{nullptr}; rtAicpuDeployType_t deploy_type_{AICPU_DEPLOY_RESERVED}; // ACL queue schedule, save queue ids for Init. std::vector cpu_task_list_; std::vector input_queue_ids_; // input queue ids created by caller. std::vector output_queue_ids_; // output queue ids created by caller. std::vector input_mbuf_list_; // input mbuf created by dequeue task. std::vector output_mbuf_list_; // output mbuf created by dequeue task. uint64_t session_id_; uint32_t device_id_; std::mutex flowctrl_op_index_internal_map_mutex_; std::map flowctrl_op_index_internal_map_; std::vector active_stream_list_; std::set active_stream_indication_; std::set hcom_streams_; RuntimeParam runtime_param_; static std::mutex tvm_bin_mutex_; std::set tvm_bin_kernel_; std::map used_tbe_handle_map_; // for profiling task and graph info std::vector task_desc_info_; int64_t maxDumpOpNum_; // for data dump DataDumper data_dumper_; uint64_t iterator_count_; bool is_l1_fusion_enable_; std::map saved_task_addrs_; void *l1_fusion_addr_ = nullptr; bool known_node_ = false; uint32_t total_args_size_ = 0; void *args_ = nullptr; void *args_host_ = nullptr; void *fixed_addrs_ = nullptr; void *hybrid_addrs_ = nullptr; uint32_t total_hybrid_args_size_ = 0; int64_t total_fixed_addr_size_ = 0; std::map knonw_input_data_info_; std::map knonw_output_data_info_; vector total_io_addrs_; vector orig_total_io_addrs_; bool base_addr_not_changed_ = false; vector> batch_info_; std::vector> combined_batch_info_; vector user_designate_shape_order_; int32_t dynamic_type_ = 0; bool is_dynamic_ = false; vector batch_size_; // key: input tensor name, generally rts op; // value: the fixed addr of input anchor, same as the peer output anchor addr of the peer op std::map tensor_name_to_fixed_addr_size_; // key: input tensor name, generally rts op; value: the peer output anchor of the peer op std::map tensor_name_to_peer_output_index_; // if model is first execute bool is_first_execute_; // for op debug std::mutex debug_reg_mutex_; bool is_op_debug_reg_ = false; void *op_debug_addr_ = nullptr; void *p2p_debug_addr_ = nullptr; bool is_new_model_desc_{false}; bool is_online_infer_dynamic_ = false; bool is_getnext_sink_dynamic_ = false; std::vector cur_dynamic_dims_; void *netoutput_last_input_addr_ = nullptr; int64_t netoutput_last_input_size_ = 0; size_t shape_of_cur_dynamic_dims_ = 0; // key: input_index: input is merge node; value: each gear info and each output size std::map, int64_t>> merge_nodes_gear_and_real_out_size_info_; // key: input_index: input is merge node; value: each gear info and each output shape std::map, vector>> merge_nodes_gear_and_real_out_shape_info_; std::vector> all_gears_info_; std::multimap op_id_map_; std::vector profile_list_; // For super kernel. SuperKernelTaskInfo skt_info_; }; } // namespace ge #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_