Merge branch 'development' of gitee.com:dong-duo/graphengine into development

5 years ago · d5c6008198
parent 3a8999a78a 7462692274
commit d5c6008198
76 changed files with 1308 additions and 2190 deletions
--- a/ge/common/ge/op_tiling_manager.cc
+++ b/ge/common/ge/op_tiling_manager.cc
@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() {
  }
 }

+OpTilingManager &OpTilingManager::GetInstance() {
+  static OpTilingManager instance;
+  return instance;
+}
 }  // namespace ge
--- a/ge/common/ge/op_tiling_manager.h
+++ b/ge/common/ge/op_tiling_manager.h
@ -25,6 +25,7 @@ using SoToHandleMap = std::map<std::string, void *>;
 class OpTilingManager {
 public:
  OpTilingManager() = default;
+  static OpTilingManager &GetInstance();
  ~OpTilingManager();
  void LoadSo();

--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@ -72,7 +72,89 @@ set(SRC_LIST
    "../single_op/task/tbe_task_builder.cc"
    "../single_op/task/aicpu_task_builder.cc"
    "../single_op/task/aicpu_kernel_task_builder.cc"
-    "../hybrid/hybrid_davinci_model_stub.cc"
+    "../hybrid/common/tensor_value.cc"
+    "../hybrid/common/npu_memory_allocator.cc"
+    "../hybrid/executor/rt_callback_manager.cc"
+    "../hybrid/executor/node_state.cc"
+    "../hybrid/executor/node_done_manager.cc"
+    "../hybrid/executor/hybrid_profiler.cc"
+    "../hybrid/executor/hybrid_model_executor.cc"
+    "../hybrid/executor/hybrid_model_async_executor.cc"
+    "../hybrid/executor/hybrid_execution_context.cc"
+    "../hybrid/executor/subgraph_context.cc"
+    "../hybrid/executor/subgraph_executor.cc"
+    "../hybrid/executor/worker/task_compile_engine.cc"
+    "../hybrid/executor/worker/shape_inference_engine.cc"
+    "../hybrid/executor/worker/execution_engine.cc"
+    "../hybrid/model/hybrid_model.cc"
+    "../hybrid/model/hybrid_model_builder.cc"
+    "../hybrid/model/node_item.cc"
+    "../hybrid/model/graph_item.cc"
+    "../hybrid/node_executor/aicore/aicore_node_executor.cc"
+    "../hybrid/node_executor/aicore/aicore_op_task.cc"
+    "../hybrid/node_executor/aicore/aicore_task_builder.cc"
+    "../hybrid/node_executor/aicpu/aicpu_node_executor.cc"
+    "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
+    "../hybrid/node_executor/ge_local/ge_local_node_executor.cc"
+    "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
+    "../hybrid/node_executor/host_cpu/kernel_factory.cc"
+    "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
+    "../hybrid/node_executor/controlop/control_op_executor.cc"
+    "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
+    "../hybrid/node_executor/rts/rts_node_executor.cc"
+    "../hybrid/node_executor/node_executor.cc"
+    "../hybrid/node_executor/task_context.cc"
+    "../hybrid/hybrid_davinci_model.cc"
+    "../ge_local_engine/engine/host_cpu_engine.cc"
+    "../graph/common/omg_util.cc"
+    "../graph/manager/host_mem_manager.cc"
+    "../graph/build/memory/var_mem_assign_util.cc"
+    "../host_kernels/transpose_kernel.cc"
+    "../host_kernels/add_kernel.cc"
+    "../host_kernels/broadcast_args_kernel.cc"
+    "../host_kernels/broadcast_gradient_args_kernel.cc"
+    "../host_kernels/cast_kernel.cc"
+    "../host_kernels/concat_offset_kernel.cc"
+    "../host_kernels/concat_v2_kernel.cc"
+    "../host_kernels/dynamic_stitch_kernel.cc"
+    "../host_kernels/identity_kernel.cc"
+    "../host_kernels/empty_kernel.cc"
+    "../host_kernels/expanddims_kernel.cc"
+    "../host_kernels/fill_kernel.cc"
+    "../host_kernels/floordiv_kernel.cc"
+    "../host_kernels/floormod_kernel.cc"
+    "../host_kernels/gather_v2_kernel.cc"
+    "../host_kernels/greater_kernel.cc"
+    "../host_kernels/kernel_utils.cc"
+    "../host_kernels/maximum_kernel.cc"
+    "../host_kernels/mul_kernel.cc"
+    "../host_kernels/pack_kernel.cc"
+    "../host_kernels/permute_kernel.cc"
+    "../host_kernels/range_kernel.cc"
+    "../host_kernels/rank_kernel.cc"
+    "../host_kernels/reduce_prod_kernel.cc"
+    "../host_kernels/reshape_kernel.cc"
+    "../host_kernels/rsqrt_kernel.cc"
+    "../host_kernels/shape_kernel.cc"
+    "../host_kernels/shape_n_kernel.cc"
+    "../host_kernels/size_kernel.cc"
+    "../host_kernels/slice_d_kernel.cc"
+    "../host_kernels/slice_kernel.cc"
+    "../host_kernels/squeeze_kernel.cc"
+    "../host_kernels/unsqueeze_kernel.cc"
+    "../host_kernels/ssd_prior_box_kernel.cc"
+    "../host_kernels/strided_slice_kernel.cc"
+    "../host_kernels/sub_kernel.cc"
+    "../host_kernels/transdata_kernel.cc"
+    "../host_kernels/unpack_kernel.cc"
+    "../graph/passes/pass_utils.cc"
+    "../graph/common/bcast.cc"
+    "../common/fp16_t.cc"
+    "../common/formats/format_transfers/format_transfer_transpose.cc"
+    "../common/formats/utils/formats_trans_utils.cc"
 )

 ######## libge_executor.a ########
@ -105,9 +187,9 @@ target_include_directories(ge_executor PRIVATE
    ${CMAKE_BINARY_DIR}/proto/ge
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
-    ${GE_CODE_DIR}/../inc/cce   
+    ${GE_CODE_DIR}/../inc/cce
    #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc 
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_libraries(ge_executor PRIVATE
@ -147,9 +229,9 @@ target_include_directories(ge_executor_shared PRIVATE
    ${CMAKE_BINARY_DIR}/proto/ge
    #### yellow zone ####
    ${GE_CODE_DIR}/../inc
-    ${GE_CODE_DIR}/../inc/cce   
+    ${GE_CODE_DIR}/../inc/cce
    #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc 
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )

 target_link_libraries(ge_executor_shared PRIVATE
@ -158,7 +240,7 @@ target_link_libraries(ge_executor_shared PRIVATE
    -Wl,--no-as-needed
    ge_common
    runtime
-    slog 
+    slog
    mmpa
    graph
    register
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@ -39,6 +39,8 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
+#include "graph/opsproto_manager.h"
+#include "ge_local_engine/engine/host_cpu_engine.h"

 using std::string;
 using std::vector;
@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener {
  std::shared_ptr<ge::ModelListener> listener;
 };

+static void InitOpsProtoManger() {
+  string opsproto_path;
+  const char *path_env = std::getenv("ASCEND_OPP_PATH");
+  if (path_env != nullptr) {
+    string path = path_env;
+    string file_path = RealPath(path.c_str());
+    if (file_path.empty()) {
+      GELOGE(FAILED, "File path %s is invalid.", path.c_str());
+      return;
+    }
+    opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
+    GELOGI("Get opsproto so path from env : %s", path.c_str());
+  } else {
+    string path_base = PluginManager::GetPath();
+    GELOGI("path_base is %s", path_base.c_str());
+    path_base = path_base.substr(0, path_base.rfind('/'));
+    path_base = path_base.substr(0, path_base.rfind('/') + 1);
+    opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
+  }
+
+  GELOGI("Get opsproto path is %s", opsproto_path.c_str());
+  OpsProtoManager *manager = OpsProtoManager::Instance();
+  map<string, string> option_tmp;
+  option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
+  (void)manager->Initialize(option_tmp);
+}
+
 GeExecutor::GeExecutor() {}

 Status GeExecutor::Initialize() {
@ -230,6 +259,16 @@ Status GeExecutor::Initialize() {
    return ge::SUCCESS;
  }

+  OpTilingManager::GetInstance().LoadSo();
+
+  Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
+  if (initHostCpuEngineStatus != SUCCESS) {
+    GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
+    return initHostCpuEngineStatus;
+  }
+
+  InitOpsProtoManger();
+
  std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
  mem_type.push_back(RT_MEMORY_P2P_DDR);
  auto ret = MemManager::Instance().Initialize(mem_type);
@ -600,10 +639,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
    return ACL_ERROR_GE_INTERNAL_ERROR;
  }

-  std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
-  if (davinci_model != nullptr) {
-    uint64_t session_id = davinci_model->GetSessionId();
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    uint64_t session_id = hybrid_davinci_model->GetSessionId();
    VarManagerPool::Instance().RemoveVarManager(session_id);
+  } else {
+    std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
+    if (davinci_model != nullptr) {
+      uint64_t session_id = davinci_model->GetSessionId();
+      VarManagerPool::Instance().RemoveVarManager(session_id);
+    }
  }
  ret = GraphLoader::UnloadModel(model_id);
  if (ret != SUCCESS) {
@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
 */
 Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
                             ge::RunModelData &run_output_data, bool async_mode) {
+  std::vector<GeTensorDesc> input_desc = {};
+  std::vector<GeTensorDesc> output_desc = {};
+  return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);
+}
+
+/**
+* @ingroup ge
+* @brief Synchronous execution of offline model(Do not create thread)
+* @param [in] uint32_t model_id: Model ID to execute
+              void* stream: stream to execute
+              const domi::InputData *input_data: Model input data
+              const std::vector<GeTensorDesc> &input_desc: Description of model input data
+              bool async_mode: is asynchronize mode
+* @param [out] domi::OutputData *output_data: Model output data
+* @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data
+* @return SUCCESS handle successfully / others handle failed
+*/
+Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
+                             const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
+                             std::vector<GeTensorDesc> &output_desc, bool async_mode) {
  if (!isInit_) {
    GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
    return ACL_ERROR_GE_EXEC_NOT_INIT;
@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
    }
  }

-  return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data);
+  return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
 }

 /**
--- a/ge/executor/module.mk
+++ b/ge/executor/module.mk
@ -61,9 +61,91 @@ local_ge_executor_src_files :=  \
    ../single_op/task/tbe_task_builder.cc \
    ../single_op/task/aicpu_task_builder.cc \
    ../single_op/task/aicpu_kernel_task_builder.cc \
-    ../hybrid/hybrid_davinci_model_stub.cc\
    ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \
    ../graph/common/local_context.cc \
+    ../hybrid/common/tensor_value.cc                                        \
+    ../hybrid/common/npu_memory_allocator.cc                                \
+    ../hybrid/executor/rt_callback_manager.cc                               \
+    ../hybrid/executor/node_state.cc                                        \
+    ../hybrid/executor/node_done_manager.cc                                 \
+    ../hybrid/executor/hybrid_profiler.cc                                   \
+    ../hybrid/executor/hybrid_model_executor.cc                             \
+    ../hybrid/executor/hybrid_model_async_executor.cc                       \
+    ../hybrid/executor/hybrid_execution_context.cc                          \
+    ../hybrid/executor/subgraph_context.cc                                  \
+    ../hybrid/executor/subgraph_executor.cc                                 \
+    ../hybrid/executor/worker/task_compile_engine.cc                        \
+    ../hybrid/executor/worker/shape_inference_engine.cc                     \
+    ../hybrid/executor/worker/execution_engine.cc                           \
+    ../hybrid/model/hybrid_model.cc                                         \
+    ../hybrid/model/hybrid_model_builder.cc                                 \
+    ../hybrid/model/node_item.cc                                            \
+    ../hybrid/model/graph_item.cc                                           \
+    ../hybrid/node_executor/aicore/aicore_node_executor.cc                  \
+    ../hybrid/node_executor/aicore/aicore_op_task.cc                        \
+    ../hybrid/node_executor/aicore/aicore_task_builder.cc                   \
+    ../hybrid/node_executor/aicpu/aicpu_node_executor.cc                    \
+    ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc         \
+    ../hybrid/node_executor/ge_local/ge_local_node_executor.cc              \
+    ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc              \
+    ../hybrid/node_executor/host_cpu/kernel_factory.cc                      \
+    ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc                 \
+    ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
+    ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
+    ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
+    ../hybrid/node_executor/controlop/control_op_executor.cc                \
+    ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
+    ../hybrid/node_executor/rts/rts_node_executor.cc                        \
+    ../hybrid/node_executor/node_executor.cc                                \
+    ../hybrid/node_executor/task_context.cc                                 \
+    ../hybrid/hybrid_davinci_model.cc                                       \
+    ../ge_local_engine/engine/host_cpu_engine.cc \
+    ../graph/common/omg_util.cc \
+    ../graph/manager/host_mem_manager.cc \
+    ../graph/build/memory/var_mem_assign_util.cc \
+    ../host_kernels/transpose_kernel.cc \
+    ../host_kernels/add_kernel.cc \
+    ../host_kernels/broadcast_args_kernel.cc \
+    ../host_kernels/broadcast_gradient_args_kernel.cc \
+    ../host_kernels/cast_kernel.cc \
+    ../host_kernels/concat_offset_kernel.cc \
+    ../host_kernels/concat_v2_kernel.cc \
+    ../host_kernels/dynamic_stitch_kernel.cc \
+    ../host_kernels/identity_kernel.cc \
+    ../host_kernels/empty_kernel.cc \
+    ../host_kernels/expanddims_kernel.cc \
+    ../host_kernels/fill_kernel.cc \
+    ../host_kernels/floordiv_kernel.cc \
+    ../host_kernels/floormod_kernel.cc \
+    ../host_kernels/gather_v2_kernel.cc  \
+    ../host_kernels/greater_kernel.cc \
+    ../host_kernels/kernel_utils.cc \
+    ../host_kernels/maximum_kernel.cc \
+    ../host_kernels/mul_kernel.cc \
+    ../host_kernels/pack_kernel.cc \
+    ../host_kernels/permute_kernel.cc \
+    ../host_kernels/range_kernel.cc \
+    ../host_kernels/rank_kernel.cc \
+    ../host_kernels/reduce_prod_kernel.cc \
+    ../host_kernels/reshape_kernel.cc \
+    ../host_kernels/rsqrt_kernel.cc \
+    ../host_kernels/shape_kernel.cc \
+    ../host_kernels/shape_n_kernel.cc \
+    ../host_kernels/size_kernel.cc \
+    ../host_kernels/slice_d_kernel.cc \
+    ../host_kernels/slice_kernel.cc \
+    ../host_kernels/squeeze_kernel.cc \
+    ../host_kernels/unsqueeze_kernel.cc \
+    ../host_kernels/ssd_prior_box_kernel.cc \
+    ../host_kernels/strided_slice_kernel.cc \
+    ../host_kernels/sub_kernel.cc \
+    ../host_kernels/transdata_kernel.cc \
+    ../host_kernels/unpack_kernel.cc \
+    ../graph/passes/pass_utils.cc \
+    ../graph/common/bcast.cc \
+    ../common/fp16_t.cc \
+    ../common/formats/format_transfers/format_transfer_transpose.cc \
+    ../common/formats/utils/formats_trans_utils.cc \

 local_ge_executor_c_include :=             \
    proto/insert_op.proto                  \
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
 )

 ############ libge_local_opskernel_builder.a ############
-add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
+add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})

 target_compile_options(ge_local_opskernel_builder_static PRIVATE
    -Werror
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {

 void HostCpuEngine::CloseSo() {
  for (auto handle : lib_handles_) {
-    if (dlclose(handle) != 0) {
-      GELOGW("failed to close handle, message: %s", dlerror());
+    if (mmDlclose(handle) != 0) {
+      GELOGW("failed to close handle, message: %s", mmDlerror());
    }
  }
  lib_handles_.clear();
@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {

 Status HostCpuEngine::LoadLib(const std::string &lib_path) {
  GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
-  auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
+  auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
  if (handle == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror());
+    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror());
    return INTERNAL_ERROR;
  }

-  auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize");
+  auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize");
  if (initialize != nullptr) {
    GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
    if (initialize(HostCpuContext()) != SUCCESS) {
--- a/ge/ge_local_engine/engine/host_cpu_engine.h
+++ b/ge/ge_local_engine/engine/host_cpu_engine.h
@ -20,7 +20,7 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/node.h"
 #include "graph/operator.h"
-#include "register/register.h"
+#include "external/../register/register.h"

 namespace ge {
 class HostCpuEngine {
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@ -13,6 +13,9 @@ set(GE_SRC_LIST
    "task/hccl_task.cc"
    "task/memcpy_async_task.cc"
    "task/profiler_task.cc"
+    "task/label_goto_task.cc"
+    "task/label_set_task.cc"
+    "task/label_switch_task.cc"
 )

 add_library(ge_runtime SHARED ${GE_SRC_LIST})
--- a/ge/ge_runtime/runtime_model.cc
+++ b/ge/ge_runtime/runtime_model.cc
@ -307,8 +307,8 @@ bool RuntimeModel::Run() {

  ret = rtStreamSynchronize(rt_model_stream_);
  if (ret != RT_ERROR_NONE) {
-    if (ret == RT_ERROR_END_OF_SEQUENCE) {
-      GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret);
+    if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) {
+      GELOGI("Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received, ret = 0x%X", ret);
      return true;
    }
    GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret);
--- a/ge/ge_runtime/task/task.h
+++ b/ge/ge_runtime/task/task.h
@ -24,6 +24,7 @@
 #include "runtime/rt_model.h"
 #include "ge_runtime/model_context.h"
 #include "ge_runtime/task_info.h"
+#include "external/runtime/rt_error_codes.h"

 namespace ge {
 namespace model_runner {
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@ -30,6 +30,7 @@
 #include "model/ge_model.h"
 #include "graph/ge_context.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
+#include "graph/utils/op_desc_utils.h"

 using domi::BuildMode;

@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
  return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
 }

+static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
+                               const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
+  GE_CHECK_NOTNULL(out_anchor);
+  NodePtr in_node = out_anchor->GetOwnerNode();
+  GE_CHECK_NOTNULL(in_node);
+  OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
+  OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
+                                     .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
+                                     .Build();
+  (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
+  if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
+    GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
+    return FAILED;
+  }
+  return SUCCESS;
+}
+
+static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
+  for (auto &node : graph->GetDirectNode()) {
+    // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
+    auto op_desc = node->GetOpDesc();
+    if (op_desc == nullptr) {
+      continue;
+    }
+    auto op_type = op_desc->GetType();
+    if (op_type == NETOUTPUT) {
+      for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
+        const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
+        GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
+        NodePtr in_node = peer_out_anchor->GetOwnerNode();
+        GE_CHECK_NOTNULL(in_node);
+
+        std::string in_node_op_type = in_node->GetType();
+        if (in_node_op_type == CONSTANT) {
+          GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
+          std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
+          if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
+            GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str());
+            return FAILED;
+          }
+        }
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                               std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                               GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
    if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
      continue;
    }
+
+    GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");
+
    if (sub_graph->GetGraphUnknownFlag()) {
      // unknown shape build flow
      GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
 /// @param [in] stream   stream to execute model on
 /// @param [in] async_mode  is asynchronize mode.
 /// @param [in] input_data  model input data
+/// @param [in] input_desc  description of model input data
 /// @param [out] output_data  model output data
+/// @param [out] output_desc  description of model output data
 ///
 Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                                 OutputData &output_data) {
+                                 const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                                 std::vector<GeTensorDesc> &output_desc) {
  auto model_manager = ModelManager::GetInstance();
  GE_CHECK_NOTNULL(model_manager);
-  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data);
+  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
  if (ret != SUCCESS) {
    GELOGE(ret, "Execute model failed, model_id:%u.", model_id);
    return ret;
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@ -65,7 +65,8 @@ class GraphLoader {
                               const std::vector<uint32_t> &output_queue_ids);

  static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                             OutputData &output_data);
+                             const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                             std::vector<GeTensorDesc> &output_desc);

  static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);

--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
      ReplaceStringElem(op_name);
      ReplaceStringElem(op_type);
      string dump_file_path =
-          "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time);
+          "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
      GELOGI("The exception dump file path is %s", dump_file_path.c_str());

      uint64_t proto_size = dump_data.ByteSizeLong();
-      unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
+      std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
      bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
      if (!ret || proto_size == 0) {
        GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
      load_end_time_(0),
      time_info_(),
      dataInputTid(0),
-      is_model_has_inited_(false),
+      is_weight_mem_has_inited_(false),
+      is_feature_map_mem_has_inited_(false),
      model_id_(0),
      runtime_model_id_(0),
      version_(0),
@ -263,34 +264,65 @@ void DavinciModel::Shrink() {
  ge_model_.reset();  // delete object.
 }

-Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
-  if (is_model_has_inited_) {
-    GELOGE(FAILED, "call InitModelMem more than once .");
+Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
+  if (is_weight_mem_has_inited_) {
+    GELOGE(FAILED, "call InitWeightMem more than once.");
    return FAILED;
  }
-  is_model_has_inited_ = true;
+  is_weight_mem_has_inited_ = true;

-  std::size_t data_size = TotalMemSize();
-  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
  const Buffer &weights = ge_model_->GetWeight();
  std::size_t weights_size = weights.GetSize();
  GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE);

-  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
-    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
+  if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
+    GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
    return FAILED;
  }

-  if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
-    GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
+  weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
+  is_inner_weight_base_ = false;
+
+  if (weights_size != 0) {
+    weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
+    is_inner_weight_base_ = false;
+    if (weight_ptr == nullptr) {
+      weights_mem_base_ = MallocWeightsMem(weights_size);
+      if (weights_mem_base_ == nullptr) {
+        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
+        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
+      }
+      is_inner_weight_base_ = true;
+    }
+    GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+           weights_mem_base_, weights_size);
+    GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
+    GELOGI("copy weights data to device");
+  }
+
+  runtime_param_.weight_base = weights_mem_base_;
+  return SUCCESS;
+}
+
+
+Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
+  if (is_feature_map_mem_has_inited_) {
+    GELOGE(FAILED, "call InitFeatureMapMem more than once .");
+    return FAILED;
+  }
+  is_feature_map_mem_has_inited_ = true;
+
+  std::size_t data_size = TotalMemSize();
+  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
+
+  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
+    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
    return FAILED;
  }

  mem_base_ = static_cast<uint8_t *>(dev_ptr);
  p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
-  weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
  is_inner_mem_base_ = false;
-  is_inner_weight_base_ = false;

  if (TotalMemSize() && mem_base_ == nullptr) {
    mem_base_ = MallocFeatureMapMem(data_size);
@ -298,12 +330,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
      GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
      return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
    }
-    GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
            mem_base_, data_size);
-    weights_mem_base_ = mem_base_;

+    if (!is_inner_weight_base_) {
+      weights_mem_base_ = mem_base_;
+      is_inner_weight_base_ = true;
+    }
    is_inner_mem_base_ = true;
-    is_inner_weight_base_ = true;
  }

  if (p2p_data_size != 0) {
@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
      GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
      return GE_EXEC_ALLOC_P2P_MEM_FAILED;
    }
-    GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
           p2p_mem_base_, p2p_data_size);
    is_inner_p2p_mem_base_ = true;
  }

-  if (weights_size != 0) {
-    weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
-    is_inner_weight_base_ = false;
-    if (weight_ptr == nullptr) {
-      weights_mem_base_ = MallocWeightsMem(weights_size);
-      if (weights_mem_base_ == nullptr) {
-        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
-        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
-      }
-      is_inner_weight_base_ = true;
-    }
-    GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
-           weights_mem_base_, weights_size);
-    GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
-  }
-
  GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
  runtime_param_.mem_base = mem_base_;
  runtime_param_.weight_base = weights_mem_base_;
@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size

  GE_TIMESTAMP_START(InitModelMem);
  GELOGD("Known node is %d", known_node_);
+  GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size));
  if (!known_node_) {
-    GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size));
+    GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
    data_inputer_ = new (std::nothrow) DataInputer();
    GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
  }
@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
    GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
                    GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
  }
+
  return SUCCESS;
 }

@ -2780,7 +2800,7 @@ void *DavinciModel::Run(DavinciModel *model) {
                                      reinterpret_cast<int64_t *>(shape_data_buffer_data) +
                                      shape_data_buffer_length / sizeof(int64_t));
      GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
-      delete[] (int64_t *)current_data.blobs.back().data;
+      delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data);
      current_data.blobs.pop_back();
    }
    GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@ -584,7 +584,8 @@ class DavinciModel {

  Status SyncVarData();

-  Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize);
+  Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
+  Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);

  void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);

@ -850,7 +851,9 @@ class DavinciModel {
  Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
  Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);

-  bool is_model_has_inited_;
+  bool is_weight_mem_has_inited_;
+  bool is_feature_map_mem_has_inited_;
+
  uint32_t model_id_;
  uint32_t runtime_model_id_;
  string name_;
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@ -31,6 +31,7 @@
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "common/formats/utils/formats_trans_utils.h"
+#include "hybrid/hybrid_davinci_model.h"

 namespace ge {
 thread_local uint32_t device_count = 0;
@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {

 ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
  std::lock_guard<std::mutex> lock(map_mutex_);
+  auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
+  if (hybrid_davinci_model != hybrid_model_map_.end()) {
+    uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
+    DestroyAicpuSession(session_id);
+    return SUCCESS;
+  }
+
  auto it = model_map_.find(model_id);
  if (it == model_map_.end()) {
    GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
@ -216,7 +224,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {

 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
  GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
-  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::lock_guard<std::mutex> lock(map_mutex_);
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
    Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@ -229,7 +237,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
 }

 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
-  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::lock_guard<std::mutex> lock(map_mutex_);
  std::vector<uint64_t> v_aicpu_kernel;
  std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
  if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
                                            vector<InputOutputDescInfo> &output_desc,
                                            std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
                                            bool new_model_desc) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->SetModelDescVersion(new_model_desc);
+    return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
+  }
+
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
                         "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
 ///
 Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
                                         int32_t &dynamic_type) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type);
+  }
+
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetDynamicBatchInfo failed, Invalid model id %u!", model_id);
@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect
 ///
 Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
                                                std::vector<std::string> &user_input_shape_order) {
+  auto hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order);
+    return SUCCESS;
+  }
+
  auto davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                         "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id)
@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &
 }

 Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info);
+    return SUCCESS;
+  }
+
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHECK_NOTNULL(davinci_model);
  davinci_model->GetModelAttr(dynamic_output_shape_info);
@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
 /// @param [in] stream   model stream
 /// @param [in] async_mode  is asynchronize mode.
 /// @param [in] input_data  input data
+/// @param [in] input_desc  description of input data
 /// @param [out] output_data  output data
+/// @param [out] output_desc  description of output data
 ///
 Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                                  OutputData &output_data) {
+                                  const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                                  std::vector<GeTensorDesc> &output_desc) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    auto inputs = input_data.blobs;
+    auto outputs = output_data.blobs;
+
+    Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream);
+    if (status == SUCCESS) {
+      GELOGI("Execute model %u success.", model_id);
+    }
+    return status;
+  }
+
  std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
  GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);

@ -1243,8 +1289,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
  return SUCCESS;
 }

-Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) {
-  GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
+Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) {
+  GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
  if (aicpu_kernel == nullptr) {
@ -1267,18 +1313,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_
    std::map<string, CustAICPUKernelPtr> new_so_name;
    new_so_name.insert({so_name, aicpu_kernel});
    cust_aicpu_so_[resource_id] = new_so_name;
-    GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id);
+    loaded = false;
+    GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
    return SUCCESS;
  }
  auto it_so_name = it->second.find(so_name);
  if (it_so_name == it->second.end()) {
    it->second.insert({so_name, aicpu_kernel});
-    GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id);
+    loaded = false;
+    GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
+    return SUCCESS;
  }
+  loaded = true;
+  GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str());
  return SUCCESS;
 }

 Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
+  GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str());
  std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
  if (cust_aicpu_so_.size() == 0) return SUCCESS;
  // get current context
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
  /// @param [in] stream   model stream
  /// @param [in] async_mode  is asynchronize mode.
  /// @param [in] input_data  model input data
+  /// @param [in] input_desc  description of model input data
  /// @param [out] output_data  model output data
+  /// @param [out] output_desc  description of model output data
  ///
  ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                          OutputData &output_data);
+                          const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                          std::vector<GeTensorDesc> &output_desc);

  ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);

@ -286,7 +289,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {

  ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);

-  ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name);
+  ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded);

  ge::Status LaunchCustAicpuSo();

--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
  }

  if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed");
+    bool loaded = false;
+    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded),
+            "launch cust aicpu so failed");
  }

  // copy args to new host memory
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo {

  Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
 private:
-  void SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs);
+  void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector<void *> &input_data_addrs);
  void *input_ptr_;
  rtCondition_t cond_;
  void *value_ptr_;
@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo {
  uint32_t true_stream_id_;
  rtSwitchDataType_t data_type_;
  static const uint32_t kInputNum = 2;
-  vector<int64_t> fixed_addr_offset_;
+  std::vector<int64_t> fixed_addr_offset_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {
  const void *args[] = {this->GetNavTablePtr(),
                        reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))};

-  rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM);
-  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return
-                  RT_ERROR_TO_GE_STATUS(rt_ret);)
-  rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE);
+  rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM);
+  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret);
+                  return RT_ERROR_TO_GE_STATUS(rt_ret);)
+  rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args),
+                    RT_MEMCPY_HOST_TO_DEVICE);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret);
                  return RT_ERROR_TO_GE_STATUS(rt_ret);)
  rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream,
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
@ -87,7 +87,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
  }
  GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size());
  const size_t nav_table_len = 2 * stub_func_list.size();
-  std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]);
+  std::unique_ptr<uint64_t[]> nav_table(new (std::nothrow) uint64_t[nav_table_len]);
  GE_CHECK_NOTNULL(nav_table);
  uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t);

@ -106,16 +106,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
    nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
    GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
  }
-  rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
+  rt_ret = rtMalloc(reinterpret_cast<void **>(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret);
                  return RT_ERROR_TO_GE_STATUS(rt_ret);)
-  rt_ret =
-    rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
+  rt_ret = rtMemcpy(reinterpret_cast<void *>(hbm_nav_table_addr), nav_table_size,
+                    reinterpret_cast<void *>(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret);
                  GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
  // Create the necessary metadata for the super kernel
-  h = std::unique_ptr<skt::SuperKernel>(
-      new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim));
+  h =
+    std::unique_ptr<skt::SuperKernel>(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim));
  return SUCCESS;
 }
 }  // namespace skt
--- a/ge/graph/load/new_model_manager/task_info/task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/task_info.h
@ -63,8 +63,8 @@ struct RuntimeParam {
 };

 typedef struct FusionOpInfo {
-  vector<string> original_op_names;
-  string op_name;
+  std::vector<std::string> original_op_names;
+  std::string op_name;
  uint32_t op_index;
  uint32_t stream_id;
 } FusionOpInfo;
--- a/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_task.cc
@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma
      auto dst_addr = static_cast<uint8_t *>(buffer_addr);
      GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p",
             name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr);
-      *(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr);
+      *reinterpret_cast<uintptr_t *>(args_info + offset)= reinterpret_cast<uintptr_t>(dst_addr);
      is_updated_ = true;
    }
  }
--- a/Show More
+++ b/Show More