From 0b666e41b1b448701d0ca86dc51b08928c68240f Mon Sep 17 00:00:00 2001
From: unknown <zhaozhixuan2@hisilicon.com>
Date: Sun, 22 Nov 2020 23:22:24 +0800
Subject: [PATCH 1/9] For pkg3 acllib dynamic link.

---
 .../ops_kernel_builder_manager.cc             | 20 +++++++++++--------
 .../ops_kernel_builder_manager.h              |  2 +-
 2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.cc b/ge/opskernel_manager/ops_kernel_builder_manager.cc
index e0001fcd..167be47b 100644
--- a/ge/opskernel_manager/ops_kernel_builder_manager.cc
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.cc
@@ -33,6 +33,8 @@ const std::vector<std::string> kHcclBuilderLibs = {
     "libhvd_opskernel_builder.so",
     "libhcom_gradtune_opskernel_builder.so"
 };
+
+const std::string kAicoreUtilsLib = "libaicore_utils_runtime.so";
 }  // namespace
 OpsKernelBuilderManager::~OpsKernelBuilderManager() {
   // it's OK to call Finalize multiply times
@@ -45,13 +47,11 @@ OpsKernelBuilderManager &OpsKernelBuilderManager::Instance() {
 }
 
 Status OpsKernelBuilderManager::Initialize(const map<std::string, std::string> &options, bool is_train) {
-  if (is_train) {
-    std::string lib_paths;
-    GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths));
-    plugin_manager_.reset(new (std::nothrow)PluginManager());
-    GE_CHECK_NOTNULL(plugin_manager_);
-    GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs");
-  }
+  std::string lib_paths;
+  GE_CHK_STATUS_RET_NOLOG(GetLibPaths(options, lib_paths, is_train));
+  plugin_manager_.reset(new (std::nothrow)PluginManager());
+  GE_CHECK_NOTNULL(plugin_manager_);
+  GE_CHK_STATUS_RET(plugin_manager_->LoadSo(lib_paths), "Failed to load libs");
 
   auto &kernel_builders = OpsKernelBuilderRegistry::GetInstance().GetAll();
   GELOGI("Number of OpBuild = %zu", kernel_builders.size());
@@ -100,7 +100,8 @@ OpsKernelBuilderPtr OpsKernelBuilderManager::GetOpsKernelBuilder(const string &n
   return nullptr;
 }
 
-Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths) {
+Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths,
+                                            bool is_train) {
   GELOGD("Start to execute GetLibPaths");
   std::string path_base = PluginManager::GetPath();
   std::string so_path = "plugin/opskernel/";
@@ -109,6 +110,9 @@ Status OpsKernelBuilderManager::GetLibPaths(const std::map<std::string, std::str
   for (const auto &lib_name : kBasicBuilderLibs) {
     all_lib_paths += (path + lib_name + ":");
   }
+  if (!is_train) {
+    all_lib_paths += (path_base + kAicoreUtilsLib + ":");
+  }
 
   auto iter = options.find(OPTION_EXEC_HCCL_FLAG);
   if (iter == options.end() || iter->second != "0") {
diff --git a/ge/opskernel_manager/ops_kernel_builder_manager.h b/ge/opskernel_manager/ops_kernel_builder_manager.h
index 7a95ddfa..207ebc79 100644
--- a/ge/opskernel_manager/ops_kernel_builder_manager.h
+++ b/ge/opskernel_manager/ops_kernel_builder_manager.h
@@ -48,7 +48,7 @@ class OpsKernelBuilderManager {
 
  private:
   OpsKernelBuilderManager() = default;
-  static Status GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths);
+  static Status GetLibPaths(const std::map<std::string, std::string> &options, std::string &lib_paths, bool is_train);
 
   std::unique_ptr<PluginManager> plugin_manager_;
   std::map<std::string, OpsKernelBuilderPtr> ops_kernel_builders_{};

From 7fe250695305473b22a4b652a70f81ec3101c98d Mon Sep 17 00:00:00 2001
From: wangzhengjun <wangzhengjun3@huawei.com>
Date: Mon, 7 Dec 2020 15:28:01 +0800
Subject: [PATCH 2/9] modify for static check 2

---
 ge/graph/load/new_model_manager/data_dumper.cc                | 4 ++--
 .../new_model_manager/task_info/stream_switch_task_info.h     | 4 ++--
 ge/graph/load/new_model_manager/task_info/task_info.h         | 4 ++--
 ge/hybrid/executor/hybrid_model_executor.cc                   | 2 +-
 ge/hybrid/hybrid_davinci_model.cc                             | 4 ++--
 ge/hybrid/model/hybrid_model_builder.cc                       | 2 +-
 ge/hybrid/node_executor/controlop/control_op_executor.cc      | 2 +-
 inc/framework/common/taskdown_common.h                        | 2 --
 8 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/ge/graph/load/new_model_manager/data_dumper.cc b/ge/graph/load/new_model_manager/data_dumper.cc
index 4534fe73..b331d780 100644
--- a/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/ge/graph/load/new_model_manager/data_dumper.cc
@@ -919,11 +919,11 @@ Status DataDumper::DumpExceptionInfo(const std::vector<rtExceptionInfo> exceptio
       ReplaceStringElem(op_name);
       ReplaceStringElem(op_type);
       string dump_file_path =
-          "./" + op_type + "." + op_name + "." + to_string(op_desc_info.task_id) + "." + to_string(now_time);
+          "./" + op_type + "." + op_name + "." + std::to_string(op_desc_info.task_id) + "." + std::to_string(now_time);
       GELOGI("The exception dump file path is %s", dump_file_path.c_str());
 
       uint64_t proto_size = dump_data.ByteSizeLong();
-      unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
+      std::unique_ptr<char[]> proto_msg(new (std::nothrow) char[proto_size]);
       bool ret = dump_data.SerializeToArray(proto_msg.get(), proto_size);
       if (!ret || proto_size == 0) {
         GELOGE(PARAM_INVALID, "Dump data proto serialize failed");
diff --git a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
index 89642cf8..a72d7de2 100755
--- a/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/stream_switch_task_info.h
@@ -41,7 +41,7 @@ class StreamSwitchTaskInfo : public TaskInfo {
 
   Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;
  private:
-  void SetInputAndValuePtr(DavinciModel *davinci_model, const vector<void *> &input_data_addrs);
+  void SetInputAndValuePtr(DavinciModel *davinci_model, const std::vector<void *> &input_data_addrs);
   void *input_ptr_;
   rtCondition_t cond_;
   void *value_ptr_;
@@ -49,7 +49,7 @@ class StreamSwitchTaskInfo : public TaskInfo {
   uint32_t true_stream_id_;
   rtSwitchDataType_t data_type_;
   static const uint32_t kInputNum = 2;
-  vector<int64_t> fixed_addr_offset_;
+  std::vector<int64_t> fixed_addr_offset_;
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_STREAM_SWITCH_TASK_INFO_H_
diff --git a/ge/graph/load/new_model_manager/task_info/task_info.h b/ge/graph/load/new_model_manager/task_info/task_info.h
index fe9c8c37..26f22564 100644
--- a/ge/graph/load/new_model_manager/task_info/task_info.h
+++ b/ge/graph/load/new_model_manager/task_info/task_info.h
@@ -63,8 +63,8 @@ struct RuntimeParam {
 };
 
 typedef struct FusionOpInfo {
-  vector<string> original_op_names;
-  string op_name;
+  std::vector<std::string> original_op_names;
+  std::string op_name;
   uint32_t op_index;
   uint32_t stream_id;
 } FusionOpInfo;
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index 4af34451..8ba687c2 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -82,7 +82,7 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
 Status HybridModelExecutor::Cleanup() {
   GELOGD("Start to cleanup.");
   context_.callback_manager->Destroy();
-  RuntimeInferenceContext::DestroyContext(to_string(context_.session_id));
+  RuntimeInferenceContext::DestroyContext(std::to_string(context_.session_id));
   GELOGD("Cleanup successfully.");
   return SUCCESS;
 }
diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc
index d696adf9..b6f5bb84 100755
--- a/ge/hybrid/hybrid_davinci_model.cc
+++ b/ge/hybrid/hybrid_davinci_model.cc
@@ -78,8 +78,8 @@ HybridDavinciModel::~HybridDavinciModel() {
   delete impl_;
 }
 
-unique_ptr<HybridDavinciModel> HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) {
-  auto instance = unique_ptr<HybridDavinciModel>(new (std::nothrow)HybridDavinciModel());
+std::unique_ptr<HybridDavinciModel> HybridDavinciModel::Create(const GeRootModelPtr &ge_root_model) {
+  auto instance = std::unique_ptr<HybridDavinciModel>(new (std::nothrow)HybridDavinciModel());
   if (instance != nullptr) {
     instance->impl_ = new (std::nothrow) HybridDavinciModel::Impl(ge_root_model);
     if (instance->impl_ != nullptr) {
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index f9564a8f..cd4c0a83 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -957,7 +957,7 @@ Status HybridModelBuilder::IndexTaskDefs() {
 
     // index task defs
     GELOGD("To index tasks for subgraph: %s", name.c_str());
-    unordered_map<int64_t, NodePtr> node_map;
+    std::unordered_map<int64_t, NodePtr> node_map;
     for (const auto &node : sub_graph->GetDirectNode()) {
       GE_CHECK_NOTNULL(node);
       GE_CHECK_NOTNULL(node->GetOpDesc());
diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.cc b/ge/hybrid/node_executor/controlop/control_op_executor.cc
index 83fc09ee..74920b22 100644
--- a/ge/hybrid/node_executor/controlop/control_op_executor.cc
+++ b/ge/hybrid/node_executor/controlop/control_op_executor.cc
@@ -405,7 +405,7 @@ Status ControlOpNodeExecutor::LoadTask(const HybridModel &model,
   auto node_item = model.GetNodeItem(node);
   GE_CHECK_NOTNULL(node_item);
 
-  unique_ptr<ControlOpNodeTask> node_task;
+  std::unique_ptr<ControlOpNodeTask> node_task;
   auto node_type = node->GetType();
   if (node_type == IF || node_type == STATELESSIF) {
     node_task.reset(new(std::nothrow) IfOpNodeTask());
diff --git a/inc/framework/common/taskdown_common.h b/inc/framework/common/taskdown_common.h
index b1364d16..12c6af89 100644
--- a/inc/framework/common/taskdown_common.h
+++ b/inc/framework/common/taskdown_common.h
@@ -19,8 +19,6 @@
 
 #include "runtime/rt.h"
 
-using namespace std;
-
 namespace ge {
 
 #define CC_FUSION_OP_MAX 32

From 2c24f922ffc2c8ad27ccebf38028ac1c4dd7957f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=8A=A2?= <chenmai@huawei.com>
Date: Mon, 7 Dec 2020 15:30:22 +0800
Subject: [PATCH 3/9] cpplint cast fix

---
 ge/graph/load/new_model_manager/davinci_model.cc     |  2 +-
 .../task_info/super_kernel/super_kernel.cc           |  9 +++++----
 .../task_info/super_kernel/super_kernel_factory.cc   | 12 ++++++------
 ge/graph/load/new_model_manager/zero_copy_task.cc    |  2 +-
 ge/omm/csa_interact.cc                               |  2 +-
 ge/session/omg.cc                                    |  6 +++---
 6 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index 81d47b3b..1a4a5014 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -2801,7 +2801,7 @@ void *DavinciModel::Run(DavinciModel *model) {
                                       reinterpret_cast<int64_t *>(shape_data_buffer_data) +
                                       shape_data_buffer_length / sizeof(int64_t));
       GELOGD("Data: cur dynamic dims is %s", formats::JoinToString(model->cur_dynamic_dims_).c_str());
-      delete[] (int64_t *)current_data.blobs.back().data;
+      delete[] reinterpret_cast<int64_t *>(current_data.blobs.back().data);
       current_data.blobs.pop_back();
     }
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), model->SetProfileTime(MODEL_PRE_PROC_END));
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
index 63f29f84..e94fa425 100644
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
@@ -25,10 +25,11 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {
   const void *args[] = {this->GetNavTablePtr(),
                         reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))};
 
-  rtError_t rt_ret = rtMalloc((void **)&(device_args_addr_), sizeof(args), RT_MEMORY_HBM);
-  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret); return
-                  RT_ERROR_TO_GE_STATUS(rt_ret);)
-  rt_ret = rtMemcpy((void *)device_args_addr_, sizeof(args), (void *)args, sizeof(args), RT_MEMCPY_HOST_TO_DEVICE);
+  rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(device_args_addr_), sizeof(args), RT_MEMORY_HBM);
+  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret);
+                  return RT_ERROR_TO_GE_STATUS(rt_ret);)
+  rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args),
+                    RT_MEMCPY_HOST_TO_DEVICE);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failied. error: 0x%X", rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtKernelLaunchWithFlag((void *const)func_stub_, block_dim_, device_args_addr_, sizeof(args), NULL, stream,
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
index 69f7b159..39373901 100644
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel_factory.cc
@@ -87,7 +87,7 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
   }
   GELOGI("SKT: superkernel start fuse, superkernel size %zu.", stub_func_list.size());
   const size_t nav_table_len = 2 * stub_func_list.size();
-  std::unique_ptr<uint64_t[]> nav_table(new(std::nothrow) uint64_t[nav_table_len]);
+  std::unique_ptr<uint64_t[]> nav_table(new (std::nothrow) uint64_t[nav_table_len]);
   GE_CHECK_NOTNULL(nav_table);
   uint64_t nav_table_size = 2 * stub_func_list.size() * sizeof(int64_t);
 
@@ -106,16 +106,16 @@ Status SuperKernelFactory::FuseKernels(const std::vector<void *> &stub_func_list
     nav_table[i * 2 + 1] = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args_addr_list[i]));
     GELOGD("SKT: fuseKernels args base address %lu", nav_table[i * 2 + 1]);
   }
-  rt_ret = rtMalloc((void **)&hbm_nav_table_addr, nav_table_size, RT_MEMORY_HBM);
+  rt_ret = rtMalloc(reinterpret_cast<void **>(&hbm_nav_table_addr), nav_table_size, RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failed. error: 0x%X", rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
-  rt_ret =
-    rtMemcpy((void *)hbm_nav_table_addr, nav_table_size, (void *)nav_table.get(), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
+  rt_ret = rtMemcpy(reinterpret_cast<void *>(hbm_nav_table_addr), nav_table_size,
+                    reinterpret_cast<void *>(nav_table.get()), nav_table_size, RT_MEMCPY_HOST_TO_DEVICE);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMemcpy failed. error: 0x%X", rt_ret);
                   GE_CHK_RT(rtFree(hbm_nav_table_addr)); return RT_ERROR_TO_GE_STATUS(rt_ret);)
   // Create the necessary metadata for the super kernel
-  h = std::unique_ptr<skt::SuperKernel>(
-      new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim));
+  h =
+    std::unique_ptr<skt::SuperKernel>(new SuperKernel(this->func_stub_, hbm_nav_table_addr, nav_table_size, block_dim));
   return SUCCESS;
 }
 }  // namespace skt
diff --git a/ge/graph/load/new_model_manager/zero_copy_task.cc b/ge/graph/load/new_model_manager/zero_copy_task.cc
index 9b42d563..2609cb4b 100755
--- a/ge/graph/load/new_model_manager/zero_copy_task.cc
+++ b/ge/graph/load/new_model_manager/zero_copy_task.cc
@@ -131,7 +131,7 @@ Status ZeroCopyTask::UpdateTaskParam(uintptr_t addr, void *buffer_addr, const ma
       auto dst_addr = static_cast<uint8_t *>(buffer_addr);
       GELOGI("[ZCPY] %s update task, args_addr: %p, size: %zu, offset: %zu, virtual_addr: 0x%lx, user_data_addr: %p",
              name_.c_str(), args_addr_, args_size_, offset, addr, buffer_addr);
-      *(uintptr_t *)(args_info + offset) = reinterpret_cast<uintptr_t>(dst_addr);
+      *reinterpret_cast<uintptr_t *>(args_info + offset)= reinterpret_cast<uintptr_t>(dst_addr);
       is_updated_ = true;
     }
   }
diff --git a/ge/omm/csa_interact.cc b/ge/omm/csa_interact.cc
index 1599af94..1b33ddbd 100644
--- a/ge/omm/csa_interact.cc
+++ b/ge/omm/csa_interact.cc
@@ -202,7 +202,7 @@ Status CsaInteract::WriteFile(const std::string &file_name, const std::string &c
     }
   }
 
-  mmSsize_t ret = mmWrite(fd, (void *)content.c_str(), content.length());
+  mmSsize_t ret = mmWrite(fd, reinterpret_cast<void *>(const_cast<char *>(content.c_str())), content.length());
   if (ret == EN_ERROR) {
     GELOGE(INTERNAL_ERROR, "write file fail, errno is %d", errno);
     ret = mmClose(fd);
diff --git a/ge/session/omg.cc b/ge/session/omg.cc
index df837f99..b5e1e105 100755
--- a/ge/session/omg.cc
+++ b/ge/session/omg.cc
@@ -891,7 +891,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con
       if (status != ge::GRAPH_SUCCESS) {
         GELOGE(ge::FAILED, "Om file init failed.");
         if (model.model_data != nullptr) {
-          delete[](char *) model.model_data;
+          delete[] reinterpret_cast<char *>(model.model_data);
           model.model_data = nullptr;
         }
         return status;
@@ -902,7 +902,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con
       if (status != ge::GRAPH_SUCCESS) {
         GELOGE(ge::FAILED, "Get model part failed.");
         if (model.model_data != nullptr) {
-          delete[](char *) model.model_data;
+          delete[] reinterpret_cast<char *>(model.model_data);
           model.model_data = nullptr;
         }
         return status;
@@ -928,7 +928,7 @@ FMK_FUNC_HOST_VISIBILITY Status ConvertOmModelToJson(const char *model_file, con
     }
 
     if (model.model_data != nullptr) {
-      delete[](char *) model.model_data;
+      delete[] reinterpret_cast<char *>(model.model_data);
       model.model_data = nullptr;
     }
     return ret;

From 65b310205a075c72c238bd444a1396b97fbc8211 Mon Sep 17 00:00:00 2001
From: taoxudonghaha <justsheldon@163.com>
Date: Mon, 7 Dec 2020 15:33:28 +0800
Subject: [PATCH 4/9] clean bc warning and add atc.bin fwk_atc.bin

---
 ge/offline/CMakeLists.txt           | 126 +++-
 ge/offline/atc                      |  20 +
 ge/offline/module.mk                | 105 ++++
 ge/stub/gen_stubapi.py              |   7 +
 metadef                             |   2 +-
 parser                              |   2 +-
 tests/st/CMakeLists.txt             |  42 --
 tests/st/resnet50/common.cc         | 768 -------------------------
 tests/st/resnet50/common.h          | 102 ----
 tests/st/resnet50/ptest.h           | 225 --------
 tests/st/resnet50/resnet50_train.cc | 852 ----------------------------
 tests/st/test_ge_st.py              |  56 --
 12 files changed, 257 insertions(+), 2050 deletions(-)
 create mode 100644 ge/offline/atc
 delete mode 100644 tests/st/CMakeLists.txt
 delete mode 100644 tests/st/resnet50/common.cc
 delete mode 100644 tests/st/resnet50/common.h
 delete mode 100644 tests/st/resnet50/ptest.h
 delete mode 100644 tests/st/resnet50/resnet50_train.cc
 delete mode 100644 tests/st/test_ge_st.py

diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt
index 49af37c0..b3a0d53c 100644
--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -11,13 +11,13 @@ set(SRC_LIST
     "main.cc"
     "single_op_parser.cc"
     "../session/omg.cc"
-    "../ir_build/atc_ir_common.cc" 
+    "../ir_build/atc_ir_common.cc"
 )
 
 ############ atc ############
 add_executable(atc ${SRC_LIST} ${PROTO_HDRS})
 
-target_compile_options(atc PRIVATE 
+target_compile_options(atc PRIVATE
     -Werror
     -O2
     -Wno-deprecated-declarations
@@ -74,10 +74,130 @@ target_link_libraries(atc PRIVATE
     -ldl
 )
 
+############ atc.bin ############
+add_executable(atc.bin ${SRC_LIST} ${PROTO_HDRS})
+
+target_compile_options(atc.bin PRIVATE
+    -Werror
+    -O2
+    -Wno-deprecated-declarations
+)
+
+target_compile_definitions(atc.bin PRIVATE
+    PROTOBUF_INLINE_NOT_IN_HEADERS=0
+    COMPILE_OMG_PACKAGE
+    google=ascend_private
+)
+
+target_include_directories(atc.bin PRIVATE
+    ${CMAKE_CURRENT_LIST_DIR}
+    ${GE_CODE_DIR}
+    ${GE_CODE_DIR}/ge
+    ${GE_CODE_DIR}/inc/external
+    ${GE_CODE_DIR}/common/inc/external
+    ${GE_CODE_DIR}/common/inc/external/graph
+    ${GE_CODE_DIR}/inc
+    ${GE_CODE_DIR}/inc/framework
+    ${METADEF_DIR}/inc
+    ${METADEF_DIR}/inc/graph
+    ${METADEF_DIR}/inc/register
+    ${METADEF_DIR}/inc/external
+    ${METADEF_DIR}/inc/external/graph
+    ${METADEF_DIR}/inc/external/register
+    ${PARSER_DIR}
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_BINARY_DIR}/proto/ge
+    #### yellow zone ####
+    ${GE_CODE_DIR}/../inc
+    ${GE_CODE_DIR}/../inc/common
+    #### blue zone ####
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
+)
+
+target_link_libraries(atc.bin PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    ascend_protobuf
+    ge_common
+    register
+    c_sec
+    graph
+    error_manager
+    ge_compiler
+    parser_common
+    gflags
+    json
+    runtime_compile
+    slog
+    static_mmpa
+    -lrt
+    -ldl
+)
+
+############ fwk_atc.bin ############
+add_executable(fwk_atc.bin ${SRC_LIST} ${PROTO_HDRS})
+
+target_compile_options(fwk_atc.bin PRIVATE
+    -Werror
+    -O2
+    -Wno-deprecated-declarations
+)
+
+target_compile_definitions(fwk_atc.bin PRIVATE
+    PROTOBUF_INLINE_NOT_IN_HEADERS=0
+    COMPILE_OMG_PACKAGE
+    google=ascend_private
+)
+
+target_include_directories(fwk_atc.bin PRIVATE
+    ${CMAKE_CURRENT_LIST_DIR}
+    ${GE_CODE_DIR}
+    ${GE_CODE_DIR}/ge
+    ${GE_CODE_DIR}/inc/external
+    ${GE_CODE_DIR}/common/inc/external
+    ${GE_CODE_DIR}/common/inc/external/graph
+    ${GE_CODE_DIR}/inc
+    ${GE_CODE_DIR}/inc/framework
+    ${METADEF_DIR}/inc
+    ${METADEF_DIR}/inc/graph
+    ${METADEF_DIR}/inc/register
+    ${METADEF_DIR}/inc/external
+    ${METADEF_DIR}/inc/external/graph
+    ${METADEF_DIR}/inc/external/register
+    ${PARSER_DIR}
+    ${CMAKE_BINARY_DIR}
+    ${CMAKE_BINARY_DIR}/proto/ge
+    #### yellow zone ####
+    ${GE_CODE_DIR}/../inc
+    ${GE_CODE_DIR}/../inc/common
+    #### blue zone ####
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
+)
+
+target_link_libraries(fwk_atc.bin PRIVATE
+    $<BUILD_INTERFACE:intf_pub>
+    ascend_protobuf
+    ge_common
+    register
+    c_sec
+    graph
+    error_manager
+    ge_compiler
+    parser_common
+    gflags
+    json
+    runtime_compile
+    slog
+    static_mmpa
+    -lrt
+    -ldl
+)
+
 ############ install ############
 set(INSTALL_BASE_DIR "")
 set(INSTALL_LIBRARY_DIR lib)
 
-install(TARGETS atc OPTIONAL
+install(TARGETS atc atc.bin fwk_atc.bin OPTIONAL
     LIBRARY DESTINATION ${INSTALL_LIBRARY_DIR}
 )
diff --git a/ge/offline/atc b/ge/offline/atc
new file mode 100644
index 00000000..a2b96482
--- /dev/null
+++ b/ge/offline/atc
@@ -0,0 +1,20 @@
+#!/bin/bash
+#-------------------------------------------------------------------
+# Purpose:
+# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved.
+#-------------------------------------------------------------------
+
+LOCAL_PATH=$(cd "$(dirname "$0")"; pwd)
+PKG_PATH=$(cd ${LOCAL_PATH}/..; pwd)
+LIB_P="/lib64"
+PYTHON_P="/python/site-packages"
+LIB64_PATH="${PKG_PATH}${LIB_P}"
+PYTHON_PATH="${PKG_PATH}${PYTHON_P}"
+export LD_LIBRARY_PATH="${LIB64_PATH}:${LD_LIBRARY_PATH}"
+export PYTHONPATH="${PYTHON_PATH}:${PYTHONPATH}"
+
+if [ -f "${PKG_PATH}/bin/atc.bin" ];then
+    atc.bin $@
+else
+    fwk_atc.bin $@
+fi
diff --git a/ge/offline/module.mk b/ge/offline/module.mk
index 8859df29..c14be50f 100755
--- a/ge/offline/module.mk
+++ b/ge/offline/module.mk
@@ -54,3 +54,108 @@ LOCAL_LDFLAGS := -lrt -ldl
 
 include $(BUILD_HOST_EXECUTABLE)
 
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := atc.bin
+
+LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
+LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private
+
+LOCAL_SRC_FILES := \
+    main.cc \
+    single_op_parser.cc \
+    ../session/omg.cc \
+    ../ir_build/atc_ir_common.cc \
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/../ ./ \
+    $(TOPDIR)inc \
+    $(TOPDIR)metadef/inc \
+    $(TOPDIR)graphengine/inc \
+    $(TOPDIR)inc/external \
+    $(TOPDIR)metadef/inc/external \
+    $(TOPDIR)graphengine/inc/external \
+    $(TOPDIR)metadef/inc/external/graph \
+    $(TOPDIR)graphengine/inc/framework \
+    $(TOPDIR)libc_sec/include \
+    $(TOPDIR)metadef/inc/common/util \
+    $(TOPDIR)parser    \
+    third_party/json/include \
+    third_party/gflags/include \
+    third_party/protobuf/include \
+    proto/om.proto \
+    proto/ge_ir.proto \
+    proto/task.proto \
+    proto/insert_op.proto \
+
+LOCAL_SHARED_LIBRARIES := \
+    libc_sec \
+    libge_common \
+    libascend_protobuf \
+    libslog \
+    libgraph \
+    libregister \
+    liberror_manager \
+    libge_compiler \
+    libruntime_compile \
+    libparser_common \
+    liberror_manager \
+
+LOCAL_STATIC_LIBRARIES := libgflags
+
+LOCAL_LDFLAGS := -lrt -ldl
+
+include $(BUILD_HOST_EXECUTABLE)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := fwk_atc.bin
+
+LOCAL_CFLAGS += -Werror -Wno-deprecated-declarations
+LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DCOMPILE_OMG_PACKAGE -O2 -Dgoogle=ascend_private
+
+LOCAL_SRC_FILES := \
+    main.cc \
+    single_op_parser.cc \
+    ../session/omg.cc \
+    ../ir_build/atc_ir_common.cc \
+
+LOCAL_C_INCLUDES := \
+    $(LOCAL_PATH)/../ ./ \
+    $(TOPDIR)inc \
+    $(TOPDIR)metadef/inc \
+    $(TOPDIR)graphengine/inc \
+    $(TOPDIR)inc/external \
+    $(TOPDIR)metadef/inc/external \
+    $(TOPDIR)graphengine/inc/external \
+    $(TOPDIR)metadef/inc/external/graph \
+    $(TOPDIR)graphengine/inc/framework \
+    $(TOPDIR)libc_sec/include \
+    $(TOPDIR)metadef/inc/common/util \
+    $(TOPDIR)parser    \
+    third_party/json/include \
+    third_party/gflags/include \
+    third_party/protobuf/include \
+    proto/om.proto \
+    proto/ge_ir.proto \
+    proto/task.proto \
+    proto/insert_op.proto \
+
+LOCAL_SHARED_LIBRARIES := \
+    libc_sec \
+    libge_common \
+    libascend_protobuf \
+    libslog \
+    libgraph \
+    libregister \
+    liberror_manager \
+    libge_compiler \
+    libruntime_compile \
+    libparser_common \
+    liberror_manager \
+
+LOCAL_STATIC_LIBRARIES := libgflags
+
+LOCAL_LDFLAGS := -lrt -ldl
+
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/ge/stub/gen_stubapi.py b/ge/stub/gen_stubapi.py
index f2a6a287..d19b44a6 100644
--- a/ge/stub/gen_stubapi.py
+++ b/ge/stub/gen_stubapi.py
@@ -1,3 +1,10 @@
+#!/usr/bin/python3.7
+# -*- coding: UTF-8 -*-
+#-------------------------------------------------------------------
+# Purpose:
+# Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved.
+#-------------------------------------------------------------------
+
 import os
 import re
 import sys
diff --git a/metadef b/metadef
index 29c31bb8..5b9a7f84 160000
--- a/metadef
+++ b/metadef
@@ -1 +1 @@
-Subproject commit 29c31bb87d8bbe6904ab6fa72034a803fb50a746
+Subproject commit 5b9a7f84a4347f8816d492aa51f2414ccf8a0744
diff --git a/parser b/parser
index ba956d34..70369668 160000
--- a/parser
+++ b/parser
@@ -1 +1 @@
-Subproject commit ba956d349d8ad3e864d27467f4f0119333cbadc6
+Subproject commit 70369668abebed84942d9f355494a89e82cc1eac
diff --git a/tests/st/CMakeLists.txt b/tests/st/CMakeLists.txt
deleted file mode 100644
index 56babec1..00000000
--- a/tests/st/CMakeLists.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2019-2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-cmake_minimum_required(VERSION 3.0)
-set(CMAKE_CXX_STANDARD 11)
-project(ge_st CXX C)
-
-set(CMAKE_CXX_FLAGS "-O1 -fPIC -Wl,-unresolved-symbols=ignore-in-shared-libs")
-
-
-file(GLOB_RECURSE RES50_TRAIN_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
-        "resnet50/resnet50_train.cc"
-        "resnet50/common.cc"
-)
-
-include_directories(${GE_SOURCE_DIR}/inc)
-include_directories(${GE_SOURCE_DIR}/inc/graph)
-include_directories(${GE_SOURCE_DIR}/inc/framework)
-include_directories(${GE_SOURCE_DIR}/inc/external)
-include_directories(${GE_SOURCE_DIR}/inc/external/ge)
-include_directories(${GE_SOURCE_DIR}/inc/external/graph)
-include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc)
-include_directories(${GE_SOURCE_DIR}/third_party/fwkacllib/inc/ops)
-include_directories(/usr/local/HiAI/opp/op_proto/built-in/inc)
-
-add_executable(st_resnet50_train ${RES50_TRAIN_SRCS})
-target_link_libraries(st_resnet50_train
-        ${PROTOBUF_LIBRARY}
-        ge_client_train ge_memory
-)
\ No newline at end of file
diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc
deleted file mode 100644
index 674ef926..00000000
--- a/tests/st/resnet50/common.cc
+++ /dev/null
@@ -1,768 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <math.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <iostream>
-#include <vector>
-
-#include "common.h"
-#include "model.h"
-
-#define MAX_HEAD_SIZE 50
-
-using namespace std;
-using namespace ge;
-
-void update_op_format(Operator ops, Format format) {
-  printf("set format begin.........\n");
-  ge::TensorDesc tensor_desc_x = ops.GetInputDesc("x");
-  ge::TensorDesc tensor_desc_y = ops.GetOutputDesc("y");
-  Format f_x0 = tensor_desc_x.GetFormat();
-  Format f_y0 = tensor_desc_x.GetFormat();
-  printf("before set  x format:%d \n", f_x0);
-  printf("before set  y format:%d \n", f_y0);
-  printf("format to be set is :%d \n", format);
-  tensor_desc_x.SetFormat(format);
-  tensor_desc_y.SetFormat(format);
-  ops.UpdateInputDesc("x", tensor_desc_x);
-  ops.UpdateOutputDesc("y", tensor_desc_y);
-  Format f_x = tensor_desc_x.GetFormat();
-  Format f_y = tensor_desc_y.GetFormat();
-  printf("after set  x format:%d \n", f_x);
-  printf("after set  y format:%d \n", f_y);
-}
-
-/// getDimInfo: get dim info from data file
-/// param:
-/// fp: the testing datafile object
-///
-/// return :
-/// dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3,162(3*3*6*3)],4 is dim size,3,3,6,3 is the
-/// dim shape data_size: the size of the testing data including the data file
-void getDimInfo(FILE *fp, std::vector<uint64_t> &dim_info) {
-  // get dim info from hisi testing data file
-  uint32_t *dim_buffer = (uint32_t *)malloc(MAX_HEAD_SIZE * sizeof(uint32_t));
-  fread(dim_buffer, sizeof(uint32_t), MAX_HEAD_SIZE, fp);
-  dim_info.push_back(*dim_buffer);  // get dim size
-
-  // get data shape to compute the datasize
-  uint64_t data_size = 1;
-  uint32_t i = 1;
-  for (; i <= dim_info[0]; i++) {
-    dim_info.push_back(*(dim_buffer + i));
-    data_size *= *(dim_buffer + i);
-  }
-  dim_info.push_back(data_size);
-
-  free(dim_buffer);
-}
-
-/// readTestDataFile: read test date from hisi .t datafile
-/// param:
-///  infile: the path of hisi .t datafile
-/// return:
-///  dim_info: array to store the info of the dim in datafile, like [4,3,3,6,3],4 is dim size,3,3,6,3 is the dim shape
-void *readTestDataFile(std::string infile, std::vector<uint64_t> &dim_info) {
-  FILE *fp;
-  fp = fopen(infile.c_str(), "r");
-
-  if (fp == NULL) {
-    printf("ERROR: cant't open file %s\n", infile.c_str());
-    return NULL;
-  } else {
-    getDimInfo(fp, dim_info);
-    uint64_t data_size = dim_info[dim_info.size() - 1];
-
-    fclose(fp);
-
-    fp = fopen(infile.c_str(), "r");
-    if (fp == NULL) {
-      printf("ERROR: cant't open file %s\n", infile.c_str());
-      return NULL;
-    }
-    uint32_t *memory = (uint32_t *)malloc((dim_info[0] + 1 + data_size) * sizeof(uint32_t));
-    fread(memory, sizeof(uint32_t), (dim_info[0] + 1 + data_size), fp);
-    fclose(fp);
-    return memory + (dim_info[0] + 1);
-  }
-}
-
-void *readUint8TestDataFile(std::string infile, int size) {
-  FILE *fp;
-  fp = fopen(infile.c_str(), "r");
-
-  if (fp == NULL) {
-    printf("ERROR: cant't open file %s\n", infile.c_str());
-    return NULL;
-  }
-  uint8_t *memory = (uint8_t *)malloc((size) * sizeof(uint8_t));
-  fread(memory, sizeof(uint8_t), (size), fp);
-  fclose(fp);
-  return memory;
-}
-
-/// allclose
-/// param:
-///  a:compared file a
-///  b:compared file b
-///  count: the count size which will compare
-///  rtol:
-///  atol:
-/// return:
-///  true or false
-bool allclose(float *a, float *b, uint64_t count, float rtol = 1e-05, float atol = 1e-08) {
-  uint32_t i = 0;
-
-  for (; i < count; ++i) {
-    if (fabs(a[i] - b[i]) > (atol + rtol * fabs(b[i]))) {
-      printf("compara failed: i= %d, a[i]=%f, b[i]=%f,atol=%f,rtol=%f\n", i, a[i], b[i], atol, rtol);
-      return false;
-    }
-  }
-
-  return true;
-}
-
-/// compFp32WithTData: compare the data with the data in hisi .t file
-/// param:
-///  actual_output_data: the result of ge
-///  expected_data_file: the path of hisi .t result file
-///  rtol:
-///  atol:
-/// return:
-///  true of false
-bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol = 1e-05, float atol = 1e-08) {
-  std::vector<uint64_t> dim_info;
-  float *expected_output_data = (float *)readTestDataFile(expected_data_file, dim_info);
-
-  uint32_t i = 1;
-  uint64_t data_size = 1;
-  for (; i <= dim_info[0]; i++) {
-    data_size *= dim_info[i];
-  }
-  return allclose(actual_output_data, expected_output_data, data_size, rtol, atol);
-}
-
-int SwitchDatatype(DataType dt) {
-  int size = 1;
-  if (dt == ge::DT_FLOAT) size = 4;
-  if (dt == ge::DT_INT32) size = 4;
-  if (dt == ge::DT_FLOAT16) size = 2;
-  if (dt == ge::DT_INT64) size = 8;
-  return size;
-}
-
-ge::Tensor genTensor(std::vector<int64_t> tensor_shape, Format format, DataType dt) {
-  int size = 1;
-  for (int i = 0; i < tensor_shape.size(); i++) {
-    size = size * tensor_shape[i];
-  }
-
-  int data_type_size = SwitchDatatype(dt);
-
-  size = abs(size * data_type_size);
-  vector<uint8_t> data_value;
-
-  if (size == 0) {
-    TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt);
-    input_tensor_desc.SetRealDimCnt(tensor_shape.size());
-    Tensor gen_tensor = Tensor(input_tensor_desc, data_value);
-    return gen_tensor;
-  }
-  for (int i = 0; i < size; i++) {
-    data_value.push_back(1);
-  }
-  TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), format, dt);
-  input_tensor_desc.SetRealDimCnt(tensor_shape.size());
-  Tensor gen_tensor = Tensor(input_tensor_desc, data_value);
-  return gen_tensor;
-}
-
-ge::Tensor genTensor_withVaule(std::vector<int64_t> tensor_shape, float value) {
-  int size = 1;
-  for (int i = 0; i < tensor_shape.size(); i++) {
-    size = size * tensor_shape[i];
-  }
-
-  float *data_value = new float[size];
-  for (int i = 0; i < size; i++) {
-    *(data_value + i) = value;
-  }
-  Tensor gen_ge_tensor;
-  TensorDesc input_tensor_desc = TensorDesc(ge::Shape(tensor_shape), FORMAT_NCHW);
-  gen_ge_tensor.SetTensorDesc(input_tensor_desc);
-  gen_ge_tensor.SetData((uint8_t *)data_value, size * 4);
-
-  return gen_ge_tensor;
-}
-
-Tensor genTesnor_Shape_as_data(std::vector<int64_t> tensor_shape) {
-  Format format = FORMAT_NCHW;
-  DataType dt = DT_INT32;
-  int size = tensor_shape.size();
-  int32_t *tensor_data = new int32_t[size];
-  std::cout << "shape tensor size:" << size << endl;
-  for (int i = 0; i < size; i++) {
-    *(tensor_data + i) = tensor_shape[i];
-  }
-
-  Tensor gen_tensor;
-  TensorDesc input_tensor_desc = TensorDesc(ge::Shape({size}), FORMAT_NCHW, DT_INT32);
-  gen_tensor.SetData((uint8_t *)tensor_data, size * GetDatTypeSize(dt));
-  gen_tensor.SetTensorDesc(input_tensor_desc);
-
-  return gen_tensor;
-}
-
-/// train_flag is 0 when infer; train_flag is 1 when train; train_flag is 0 default
-/// run_mode_path is not 0,1,2 when TBE; run_mode_path is 1 when FE; run_mode_path is 0 default
-/// run_mode_path is 2 now when AICPU, ge.enabledlocalFmkop is 1
-ge::Status GEInitialize_api(string train_flag, string run_mode_path) {
-  ge::Status ret;
-  if (run_mode_path == "0") {
-    const std::map<string, string> config = {
-        {"device_id", "0,2,4,6"},
-        {"rank_table_file", "hccl from csa/paas"},
-        {"ge.graphRunMode", train_flag},
-        {"ge.aicpuFlag", "1"},
-        {"ge.feFlag", "1"},
-        {DDK_VERSION_FLAG, "1.60.T17.B830"},
-        {"ge.soLoadPath",
-         "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/"
-         "libaicpu_plugin.so"}};
-    ret = ge::GEInitialize(config);
-  } else if (run_mode_path == "1") {
-    const std::map<string, string> config = {
-        {"device_id", "0,2,4,6"},
-        {"rank_table_file", "hccl from csa/paas"},
-        {"ge.graphRunMode", train_flag},
-        {"ge.feFlag", "1"},
-        {DDK_VERSION_FLAG, "1.60.T17.B830"},
-        {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/bert"},
-        {"ge.soLoadPath", "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so"}};
-    ret = ge::GEInitialize(config);
-  } else if (run_mode_path == "2") {
-    const std::map<string, string> config = {{"device_id", "0,2,4,6"},
-                                             {"rank_table_file", "hccl from csa/paas"},
-                                             {"ge.graphRunMode", train_flag},
-                                             {LOCAL_FMKOP_FLAG, "1"}};
-    ret = ge::GEInitialize(config);
-  } else {
-    const std::map<string, string> config = {
-        {"device_id", "0,2,4,6"},
-        {"rank_table_file", "hccl from csa/paas"},
-        {"ge.graphRunMode", train_flag},
-        {DDK_VERSION_FLAG, "1.60.T17.B830"},
-        {TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + run_mode_path}};
-    ret = ge::GEInitialize(config);
-  }
-  std::cout << "GEInitialize_ret is " << ret << std::endl;
-
-  return ret;
-}
-
-/// train_flag is infer default
-/// run_mode: is multi group of [fe,aicpu,bert,deeplabv3,mobilenetv2,single_path_nas,ssd]
-/// but bert,deeplabv3,mobilenetv2,single_path_nas,ssd can only set one value from array
-/// eg:"fe,aicpu,bert" or "fe", default is “fe”
-/// "fe,aicpu,bert" remain open fe aicpu and bert
-ge::Status GEInitialize_api_new(string train_flag, string run_mode) {
-  ge::Status ret;
-  vector<string> modes;
-
-  char *strs = new char[run_mode.length() + 1];
-  strcpy(strs, run_mode.c_str());
-  const char *delim = ",";
-  char *p = strtok(strs, delim);
-  while (p) {
-    string s = p;        // transform substr to string
-    modes.push_back(s);  // save to result array
-    p = strtok(NULL, delim);
-  }
-
-  std::map<string, string> config = {
-      {"device_id", "0,2,4,6"},
-      {"rank_table_file", "hccl from csa/paas"},
-      {DDK_VERSION_FLAG, "1.60.T17.B830"},
-      {"ge.opsProtoLibPath", "/usr/local/HiAI/runtime/ops/op_proto/built-in/libopsproto.so"}};
-  if (train_flag == "infer")
-    config.insert(pair<string, string>("ge.graphRunMode", "0"));
-  else if (train_flag == "train")
-    config.insert(pair<string, string>("ge.graphRunMode", "1"));
-  else
-    std::cout << "GeInitialize give the error param" << std::endl;
-
-  for (int i = 0; i < modes.size(); i++) {
-    if (modes[i] == "fe") {
-      config.insert(pair<string, string>("ge.feFlag", "1"));
-      if (config.find("ge.soLoadPath") != config.end()) {
-        config["ge.soLoadPath"] =
-            "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/"
-            "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/"
-            "runtime/lib64/plugin/opskernel/librts_engine.so";
-      } else {
-        config.insert(pair<string, string>(
-            "ge.soLoadPath",
-            "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/"
-            "libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so"));
-      }
-    } else if (modes[i] == "aicpu") {
-      config.insert(pair<string, string>("ge.aicpuFlag", "1"));
-      if (config.find("ge.soLoadPath") != config.end()) {
-        config["ge.soLoadPath"] =
-            "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libfe.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/"
-            "libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/libge_local_engine.so:/usr/local/HiAI/"
-            "runtime/lib64/plugin/opskernel/librts_engine.so";
-      } else {
-        config.insert(pair<string, string>(
-            "ge.soLoadPath",
-            "/usr/local/HiAI/runtime/lib64/plugin/opskernel/libaicpu_plugin.so:/usr/local/HiAI/runtime/lib64/plugin/"
-            "opskernel/libge_local_engine.so:/usr/local/HiAI/runtime/lib64/plugin/opskernel/librts_engine.so"));
-      }
-    } else if (modes[i] == "bert" || modes[i] == "deeplabv3" || modes[i] == "mobilenetv2" ||
-               modes[i] == "single_path_nas" || modes[i] == "ssd") {
-      config.insert(pair<string, string>(TBE_PLUGIN_PATH_FLAG, "/usr/local/HiAI/runtime/lib64/tbe_plugin/" + modes[i]));
-    } else if (modes[i] == "plugin") {
-
-    } else
-      std::cout << "GeInitialize give the error param" << std::endl;
-  }
-  ret = ge::GEInitialize(config);
-
-  std::cout << "GEInitialize_ret is " << ret << std::endl;
-
-  return ret;
-}
-
-ge::Status GEFinalize_api() {
-  ge::Status ret = ge::GEFinalize();
-  std::cout << "GEFinalize ret is " << ret << std::endl;
-
-  return ret;
-}
-
-/// set train_flag
-/// if run_mode_path is "fe" remain FE process; "fe,plugin" is FE and TBE plugin process
-/// "aicpu" is open aicpu plugin
-int RunGraph_initData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test, string train_flag,
-                      string run_mode_path) {
-  std::map<string, string> options = {{RUN_FLAG, "1"}};
-  uint32_t graph_id = 0;
-
-  ge::Status ret = GEInitialize_api_new(train_flag, run_mode_path);
-  EXPECT_EQ(ret, ge::SUCCESS);
-
-  ge::Session *session = new Session(options);
-  ASSERT_TRUE(session != NULL);
-
-  std::vector<Tensor> input;
-  if (attr_test.find("input1") != attr_test.end()) {
-    Tensor input_tensor = genTensor(attr_test["input1"]);
-    input.push_back(input_tensor);
-  }
-  if (attr_test.find("input2") != attr_test.end()) {
-    Tensor input_tensor = genTensor(attr_test["input2"]);
-    input.push_back(input_tensor);
-  }
-  if (attr_test.find("input3") != attr_test.end()) {
-    Tensor input_tensor = genTensor(attr_test["input3"]);
-    input.push_back(input_tensor);
-  }
-  std::vector<Tensor> output;
-
-  ret = session->AddGraph(graph_id, graph);
-  EXPECT_EQ(ret, ge::SUCCESS);
-  if (train_flag == "1") {
-    setenv("GE_TRAIN", "1", true);
-    ret = session->RunGraph(graph_id, input, output);
-    setenv("GE_TRAIN", "0", true);
-  } else {
-    ret = session->RunGraph(graph_id, input, output);
-  }
-  delete session;
-  GEFinalize_api();
-
-  if (ret != ge::SUCCESS) {
-    std::cout << " run graph failed" << std::endl;
-    return -1;
-  } else {
-    return 0;
-  }
-}
-
-ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graph_id, Graph &graph, std::vector<Tensor> inputs,
-                                     std::vector<Tensor> &outputs) {
-  ge::Status ret = session->AddGraph(graph_id, graph);
-  EXPECT_EQ(ret, ge::SUCCESS);
-  ret = session->RunGraph(graph_id, inputs, outputs);
-
-  return ret;
-}
-
-ge::Session *create_session() {
-  // Init session
-  std::map<string, string> options = {{"a", "b"}, {TRAIN_FLAG, "1"}};
-  ge::Session *session = new Session(options);
-  ASSERT_TRUE(session != NULL);
-
-  return session;
-}
-
-ge::Session *create_aipp_session() {
-  // Init session
-  std::map<string, string> options = {{"a", "b"}, {TRAIN_FLAG, "1"}, {"ge.insertOpFile", "/root/host/ge/aipp.cfg"}};
-  ge::Session *session = new Session(options);
-  ASSERT_TRUE(session != NULL);
-
-  return session;
-}
-
-int buildCheckPointGraph(Graph &graph, map<string, TensorDesc> variables) {
-  std::vector<Operator> inputs{};
-  std::vector<Operator> outputs{};
-
-  for (map<string, TensorDesc>::iterator it = variables.begin(); it != variables.end(); ++it) {
-    auto var = op::Variable(string(it->first));
-    var.update_output_desc_y(it->second);
-    inputs.push_back(var);
-    graph.AddOp(var);
-  }
-
-  auto save = op::Save().create_dynamic_input_tensors(inputs.size());
-  for (int i = 0; i < inputs.size(); i++) {
-    save.set_dynamic_input_tensors(i, inputs[i]);
-  }
-
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return 0;
-}
-
-int buildInitGraph(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var,
-                   std::vector<float> values_var) {
-  std::vector<Operator> inputs{};
-  std::vector<Operator> outputs{};
-
-  for (int i = 0; i < desc_var.size(); i++) {
-    desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum());
-    auto tensor_data = genTensor_withVaule(desc_var[i].GetShape().GetDims(), values_var[i]);
-    auto var_constant = op::Constant().set_attr_value(tensor_data);
-    var_constant.update_output_desc_y(desc_var[i]);
-
-    auto var_init = op::Variable(string(name_var[i]));
-    var_init.update_output_desc_y(desc_var[i]);
-    auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant);
-    inputs.push_back(var_init);
-  }
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return 0;
-}
-
-int buildInitGraph_other_dataType(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var) {
-  std::vector<Operator> inputs{};
-  std::vector<Operator> outputs{};
-
-  for (int i = 0; i < desc_var.size(); i++) {
-    desc_var[i].SetRealDimCnt(desc_var[i].GetShape().GetDimNum());
-    auto tensor_data = genTensor(desc_var[i].GetShape().GetDims(), desc_var[i].GetFormat(), desc_var[i].GetDataType());
-    auto var_constant = op::Constant().set_attr_value(tensor_data);
-    var_constant.update_output_desc_y(desc_var[i]);
-
-    auto var_init = op::Variable(string(name_var[i]));
-    var_init.update_output_desc_y(desc_var[i]);
-    auto var_assign = op::Assign().set_input_ref(var_init).set_input_value(var_constant);
-    inputs.push_back(var_init);
-
-    graph.AddOp(var_constant);
-    graph.AddOp(var_init);
-    graph.AddOp(var_assign);
-  }
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return 0;
-}
-
-bool build_multi_input_multi_output_graph(Graph &graph) {
-  auto data1 = op::Data("Data1").set_attr_index(0);
-  auto data2 = op::Data("Data2").set_attr_index(1);
-
-  vector<uint64_t> dim_info;
-
-  auto relu1 = op::Relu("Relu1").set_input_x(data1);
-  auto relu2 = op::Relu("Relu2").set_input_x(data2);
-
-  auto eltwise = op::Eltwise("Eltwise")
-                     .create_dynamic_input_x(2)
-                     .set_dynamic_input_x(0, relu1)
-                     .set_dynamic_input_x(1, relu2)
-                     .set_attr_N(2)
-                     .set_attr_mode(1)
-                     .set_attr_coeff({1, 1});
-
-  auto eltwise1 = op::Eltwise("Eltwise1")
-                      .create_dynamic_input_x(2)
-                      .set_dynamic_input_x(0, eltwise)
-                      .set_dynamic_input_x(1, eltwise)
-                      .set_attr_N(2)
-                      .set_attr_mode(1)
-                      .set_attr_coeff({1, 1});
-
-  auto eltwise2 = op::Eltwise("Eltwise2")
-                      .create_dynamic_input_x(2)
-                      .set_dynamic_input_x(0, eltwise)
-                      .set_dynamic_input_x(1, eltwise)
-                      .set_attr_N(2)
-                      .set_attr_mode(1)
-                      .set_attr_coeff({1, 1});
-
-  std::vector<Operator> inputs{data1, data2};
-  std::vector<Operator> outputs{eltwise1, eltwise2};
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return true;
-}
-
-void build_big_graph(Graph &graph, map<string, std::vector<int64_t>> attr) {
-  auto data = op::Data("Data").set_attr_index(0);
-  auto weight = op::Const("weight1").set_attr_value(genTensor(attr["weight"]));
-  vector<int64_t> weight_shape(attr["weight"].begin(), attr["weight"].end());
-  TensorDesc weight_desc(ge::Shape(weight_shape), FORMAT_NCHW, DT_FLOAT);
-  weight.update_output_desc_y(weight_desc);
-  auto conv_1 = op::Conv2D("conv1").set_input_x(data).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-
-  auto conv_2 = op::Conv2D("conv2").set_input_x(conv_1).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_3 = op::Conv2D("conv3").set_input_x(conv_2).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_4 = op::Conv2D("conv4").set_input_x(conv_3).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_5 = op::Conv2D("conv5").set_input_x(conv_4).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_6 = op::Conv2D("conv6").set_input_x(conv_5).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_7 = op::Conv2D("conv7").set_input_x(conv_6).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_8 = op::Conv2D("conv8").set_input_x(conv_7).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_9 = op::Conv2D("conv9").set_input_x(conv_8).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_10 = op::Conv2D("conv10").set_input_x(conv_9).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_11 = op::Conv2D("conv11").set_input_x(conv_10).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_12 = op::Conv2D("conv12").set_input_x(conv_11).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_13 = op::Conv2D("conv13").set_input_x(conv_12).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_14 = op::Conv2D("conv14").set_input_x(conv_13).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_15 = op::Conv2D("conv15").set_input_x(conv_14).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_16 = op::Conv2D("conv16").set_input_x(conv_15).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_17 = op::Conv2D("conv17").set_input_x(conv_16).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_18 = op::Conv2D("conv18").set_input_x(conv_17).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_19 = op::Conv2D("conv19").set_input_x(conv_18).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_20 = op::Conv2D("conv20").set_input_x(conv_19).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_21 = op::Conv2D("conv21").set_input_x(conv_20).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_22 = op::Conv2D("conv22").set_input_x(conv_21).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_23 = op::Conv2D("conv23").set_input_x(conv_22).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_24 = op::Conv2D("conv24").set_input_x(conv_23).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_25 = op::Conv2D("conv25").set_input_x(conv_24).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_26 = op::Conv2D("conv26").set_input_x(conv_25).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_27 = op::Conv2D("conv27").set_input_x(conv_26).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_28 = op::Conv2D("conv28").set_input_x(conv_27).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_29 = op::Conv2D("conv29").set_input_x(conv_28).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_30 = op::Conv2D("conv30").set_input_x(conv_29).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_31 = op::Conv2D("conv31").set_input_x(conv_30).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_32 = op::Conv2D("conv32").set_input_x(conv_31).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_33 = op::Conv2D("conv33").set_input_x(conv_32).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_34 = op::Conv2D("conv34").set_input_x(conv_33).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_35 = op::Conv2D("conv35").set_input_x(conv_34).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_36 = op::Conv2D("conv36").set_input_x(conv_35).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_37 = op::Conv2D("conv37").set_input_x(conv_36).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_38 = op::Conv2D("conv38").set_input_x(conv_37).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_39 = op::Conv2D("conv39").set_input_x(conv_38).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_40 = op::Conv2D("conv40").set_input_x(conv_39).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_41 = op::Conv2D("conv41").set_input_x(conv_40).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_42 = op::Conv2D("conv42").set_input_x(conv_41).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_43 = op::Conv2D("conv43").set_input_x(conv_42).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_44 = op::Conv2D("conv44").set_input_x(conv_43).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_45 = op::Conv2D("conv45").set_input_x(conv_44).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_46 = op::Conv2D("conv46").set_input_x(conv_45).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_47 = op::Conv2D("conv47").set_input_x(conv_46).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_48 = op::Conv2D("conv48").set_input_x(conv_47).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_49 = op::Conv2D("conv49").set_input_x(conv_48).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_50 = op::Conv2D("conv50").set_input_x(conv_49).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_51 = op::Conv2D("conv51").set_input_x(conv_50).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_52 = op::Conv2D("conv52").set_input_x(conv_51).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_53 = op::Conv2D("conv53").set_input_x(conv_52).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_54 = op::Conv2D("conv54").set_input_x(conv_53).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_55 = op::Conv2D("conv55").set_input_x(conv_54).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_56 = op::Conv2D("conv56").set_input_x(conv_55).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_57 = op::Conv2D("conv57").set_input_x(conv_56).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_58 = op::Conv2D("conv58").set_input_x(conv_57).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_59 = op::Conv2D("conv59").set_input_x(conv_58).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_60 = op::Conv2D("conv60").set_input_x(conv_59).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_61 = op::Conv2D("conv61").set_input_x(conv_60).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_62 = op::Conv2D("conv62").set_input_x(conv_61).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_63 = op::Conv2D("conv63").set_input_x(conv_62).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_64 = op::Conv2D("conv64").set_input_x(conv_63).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_65 = op::Conv2D("conv65").set_input_x(conv_64).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_66 = op::Conv2D("conv66").set_input_x(conv_65).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_67 = op::Conv2D("conv67").set_input_x(conv_66).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_68 = op::Conv2D("conv68").set_input_x(conv_67).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_69 = op::Conv2D("conv69").set_input_x(conv_68).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_70 = op::Conv2D("conv70").set_input_x(conv_69).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_71 = op::Conv2D("conv71").set_input_x(conv_70).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_72 = op::Conv2D("conv72").set_input_x(conv_71).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_73 = op::Conv2D("conv73").set_input_x(conv_72).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_74 = op::Conv2D("conv74").set_input_x(conv_73).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_75 = op::Conv2D("conv75").set_input_x(conv_74).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_76 = op::Conv2D("conv76").set_input_x(conv_75).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_77 = op::Conv2D("conv77").set_input_x(conv_76).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_78 = op::Conv2D("conv78").set_input_x(conv_77).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_79 = op::Conv2D("conv79").set_input_x(conv_78).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_80 = op::Conv2D("conv80").set_input_x(conv_79).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_81 = op::Conv2D("conv81").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_82 = op::Conv2D("conv82").set_input_x(conv_81).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_83 = op::Conv2D("conv83").set_input_x(conv_82).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_84 = op::Conv2D("conv84").set_input_x(conv_83).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_85 = op::Conv2D("conv85").set_input_x(conv_84).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_86 = op::Conv2D("conv86").set_input_x(conv_85).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_87 = op::Conv2D("conv87").set_input_x(conv_86).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_88 = op::Conv2D("conv88").set_input_x(conv_87).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_89 = op::Conv2D("conv89").set_input_x(conv_88).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_90 = op::Conv2D("conv90").set_input_x(conv_89).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_91 = op::Conv2D("conv91").set_input_x(conv_80).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_92 = op::Conv2D("conv92").set_input_x(conv_91).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_93 = op::Conv2D("conv93").set_input_x(conv_92).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_94 = op::Conv2D("conv94").set_input_x(conv_93).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_95 = op::Conv2D("conv95").set_input_x(conv_94).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_96 = op::Conv2D("conv96").set_input_x(conv_95).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_97 = op::Conv2D("conv97").set_input_x(conv_96).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_98 = op::Conv2D("conv98").set_input_x(conv_97).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_99 = op::Conv2D("conv99").set_input_x(conv_98).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_100 = op::Conv2D("conv100").set_input_x(conv_99).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_101 = op::Conv2D("conv101").set_input_x(conv_100).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_102 = op::Conv2D("conv102").set_input_x(conv_101).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_103 = op::Conv2D("conv103").set_input_x(conv_102).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_104 = op::Conv2D("conv104").set_input_x(conv_103).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_105 = op::Conv2D("conv105").set_input_x(conv_104).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_106 = op::Conv2D("conv106").set_input_x(conv_105).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_107 = op::Conv2D("conv107").set_input_x(conv_106).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_108 = op::Conv2D("conv108").set_input_x(conv_107).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_109 = op::Conv2D("conv109").set_input_x(conv_108).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_110 = op::Conv2D("conv110").set_input_x(conv_109).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_111 = op::Conv2D("conv111").set_input_x(conv_110).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_112 = op::Conv2D("conv112").set_input_x(conv_111).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_113 = op::Conv2D("conv113").set_input_x(conv_112).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_114 = op::Conv2D("conv114").set_input_x(conv_113).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_115 = op::Conv2D("conv115").set_input_x(conv_114).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_116 = op::Conv2D("conv116").set_input_x(conv_115).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_117 = op::Conv2D("conv117").set_input_x(conv_116).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_118 = op::Conv2D("conv118").set_input_x(conv_117).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_119 = op::Conv2D("conv119").set_input_x(conv_118).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_120 = op::Conv2D("conv120").set_input_x(conv_119).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_121 = op::Conv2D("conv121").set_input_x(conv_120).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_122 = op::Conv2D("conv122").set_input_x(conv_121).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_123 = op::Conv2D("conv123").set_input_x(conv_122).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_124 = op::Conv2D("conv124").set_input_x(conv_123).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_125 = op::Conv2D("conv125").set_input_x(conv_124).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_126 = op::Conv2D("conv126").set_input_x(conv_125).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_127 = op::Conv2D("conv127").set_input_x(conv_126).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_128 = op::Conv2D("conv128").set_input_x(conv_127).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_129 = op::Conv2D("conv129").set_input_x(conv_128).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-  auto conv_130 = op::Conv2D("conv130").set_input_x(conv_129).set_input_filter(weight).set_attr_pads({0,0,0,0}).set_attr_strides({1,1,1,1});
-
-  std::vector<Operator> inputs{data};
-  std::vector<Operator> outputs{conv_130};
-  graph.SetInputs(inputs).SetOutputs(outputs);
-}
-
-int GetDatTypeSize(DataType dt) {
-  int dailation = 1;
-  if (dt == ge::DT_FLOAT)
-    dailation = 4;
-  else if (dt == ge::DT_FLOAT16)
-    dailation = 2;
-  else if (dt == ge::DT_INT16)
-    dailation = 2;
-  else if (dt == ge::DT_UINT16)
-    dailation = 2;
-  else if (dt == ge::DT_INT32)
-    dailation = 4;
-  else if (dt == ge::DT_UINT32)
-    dailation = 4;
-  else if (dt == ge::DT_INT64)
-    dailation = 8;
-  else if (dt == ge::DT_UINT64)
-    dailation = 8;
-  else if (dt == ge::DT_INT8)
-    dailation = 1;
-
-  return dailation;
-}
-
-int buildConvGraph_new(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, int flag,
-                       Format format) {
-  auto data_x_shape = op::Data("xShape").set_attr_index(0);
-  auto var = op::Variable(name_var[0]);
-  auto var1 = op::Variable(name_var[1]);    //add one seat of ApplyMomentum()
-  auto label1 = op::Variable(name_var[2]);  //add one seat of ApplyMomentum()
-  auto conv2dgrad = op::Conv2DBackpropFilterD("output_1");
-  auto test2 = op::ApplyMomentum();
-
-  var.update_output_desc_y(desc_var[0]);
-  var1.update_output_desc_y(desc_var[1]);
-  label1.update_output_desc_y(desc_var[2]);
-
-  graph.AddOp(var);
-  graph.AddOp(var1);
-  graph.AddOp(label1);
-
-  auto conv2d = op::Conv2D().set_input_x(data_x_shape).set_input_filter(var).set_attr_strides({1, 1, 1, 1}).set_attr_pads({0,0,0,0});
-  update_op_format(conv2d, format);
-  ge::TensorDesc tensor_desc_w = conv2d.GetInputDesc("filter");
-  tensor_desc_w.SetFormat(format);
-  conv2d.UpdateInputDesc("filter", tensor_desc_w);
-
-  if (flag >= 1) {
-    conv2dgrad.set_input_x(data_x_shape)
-        .set_attr_filter_size(desc_var[0].GetShape().GetDims())
-        .set_input_out_backprop(conv2d)
-        .set_attr_strides({1, 1, 1, 1})
-        .set_attr_pads({0, 0, 0, 0});
-    update_op_format(conv2dgrad, format);
-    graph.AddOp(conv2dgrad);
-  }
-  if (flag >= 2) {
-    // set conv2dgrad var
-    test2.set_input_accum(var1)
-        .set_input_grad(conv2dgrad)
-        .set_input_lr(label1)
-        .set_input_momentum(label1)
-        .set_input_var(var);
-    graph.AddOp(test2);
-  }
-
-  std::vector<Operator> inputs{data_x_shape};  // set all val
-  std::vector<Operator> outputs{conv2d};
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  graph.AddOp(conv2d);
-
-  return 0;
-}
-
-/// load bin data_fail
-/// input_path: path of bin data_file
-/// shapes: the shape of Tensor
-/// ft: the format of Tensor
-/// dt: the dataType of Tensor
-Tensor load_variable_input_data(string input_path, std::vector<int64_t> shapes, Format ft, DataType dt) {
-  vector<uint64_t> dim_info1;
-
-  uint8_t *input_data = (uint8_t *)readTestDataFile(input_path, dim_info1);  // common.h
-  TensorDesc input_tensor_desc = TensorDesc(ge::Shape(shapes), ft, dt);
-  input_tensor_desc.SetRealDimCnt(shapes.size());
-  Tensor input_tensor = Tensor(input_tensor_desc, input_data, GetDatTypeSize(dt) * dim_info1[dim_info1[0] + 1]);
-  return input_tensor;
-}
diff --git a/tests/st/resnet50/common.h b/tests/st/resnet50/common.h
deleted file mode 100644
index 75805db7..00000000
--- a/tests/st/resnet50/common.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ST_RESNET50_GE_COMMON_H_
-#define ST_RESNET50_GE_COMMON_H_
-#include "common/ge_inner_error_codes.h"
-#include "utils/tensor_utils.h"
-
-#define MY_USER_GE_LOGI(...) GE_LOG_INFO(1, __VA_ARGS__)
-#define MY_USER_GE_LOGW(...) GE_LOG_WARN(1, __VA_ARGS__)
-#define MY_USER_GE_LOGE(...) GE_LOG_ERROR(1, 3, __VA_ARGS__)
-
-#ifndef USER_GE_LOGI
-#define USER_GE_LOGI MY_USER_GE_LOGI
-#endif  // USER_GE_LOGI
-
-#ifndef USER_GE_LOGW
-#define USER_GE_LOGW MY_USER_GE_LOGW
-#endif  // USER_GE_LOGW
-
-#ifndef USER_GE_LOGE
-#define USER_GE_LOGE MY_USER_GE_LOGE
-#endif  // USER_GE_LOGE
-
-/// train_flag is 0 when infer, train_flag is 1 when train.this param is set for RunGranph_readData() and
-/// RunGraph_initData()
-#define TRAIN_FLAG_INFER "infer"
-#define TRAIN_FLAG_TRAIN "train"
-
-#include <string.h>
-#include <unistd.h>
-#include <algorithm>
-#include <chrono>
-#include <iostream>
-#include <thread>
-#include <vector>
-
-#include "ge_api.h"
-#include "graph.h"
-#include "ptest.h"
-#include "ops/all_ops.h"
-using namespace std;
-using namespace ge;
-
-// read bin file and compile result
-void update_op_format(Operator ops, Format format = ge::FORMAT_NCHW);
-void getDimInfo(FILE *fp, std::vector<uint64_t> &dim_info);
-void *readTestDataFile(std::string infile, std::vector<uint64_t> &dim_info);
-void *readUint8TestDataFile(std::string infile, int size);
-bool allclose(float *a, float *b, uint64_t count, float rtol, float atol);
-bool compFp32WithTData(float *actual_output_data, std::string expected_data_file, float rtol, float atol);
-Tensor load_variable_input_data(string input_path, std::vector<int64_t> shapes, Format ft = ge::FORMAT_NCHW,
-                                DataType dt = ge::DT_FLOAT);
-// constructor Tensor
-int GetDatTypeSize(DataType dt);
-ge::Tensor genTensor(std::vector<int64_t> tensor_shape, Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT);
-ge::Tensor genTensor_withVaule(std::vector<int64_t> tensor_shape, float value = 1);
-Tensor genTesnor_Shape_as_data(std::vector<int64_t> tensor_shape);
-// Init GE
-ge::Status GEInitialize_api(string train_flag = "0", string run_mode_path = "0");
-ge::Status GEInitialize_api_new(string train_flag = "infer", string run_mode = "fe");
-ge::Status GEFinalize_api();
-// constructor session and build graph
-ge::Session *create_aipp_session();
-ge::Session *create_session();
-ge::Status session_add_and_run_graph(ge::Session *session, uint32_t graphId, Graph &graph, std::vector<Tensor> inputs,
-                                     std::vector<Tensor> &outputs);
-
-// common interface for infer
-int RunGraph_initData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test,
-                      string train_flag = "infer", string run_mode_path = "fe");
-void Inputs_load_Data(string op_name, std::vector<Tensor> &input, map<string, std::vector<int64_t>> attr_test,
-                      Format format = ge::FORMAT_NCHW, DataType dt = ge::DT_FLOAT);
-bool comparaData(std::vector<Tensor> &output, string op_name, map<string, std::vector<int64_t>> attr_test);
-int RunGraph_readData(Graph &graph, string op_name, map<string, std::vector<int64_t>> attr_test,
-                      string train_flag = "infer", string run_mode_path = "fe", Format format = ge::FORMAT_NCHW,
-                      DataType dt = ge::DT_FLOAT);
-
-// common interface for train
-int buildCheckPointGraph(Graph &graph, map<string, TensorDesc> variables);
-int buildInitGraph(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var,
-                   std::vector<float> values_var);
-int buildInitGraph_other_dataType(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var);
-
-bool build_multi_input_multi_output_graph(Graph &graph);
-void build_big_graph(Graph &graph, map<string, std::vector<int64_t>> attr);
-int buildConvGraph_new(Graph &graph, std::vector<TensorDesc> desc_var, std::vector<std::string> name_var, int flag = 2);
-
-#endif  // ST_RESNET50_GE_COMMON_H_
diff --git a/tests/st/resnet50/ptest.h b/tests/st/resnet50/ptest.h
deleted file mode 100644
index 568969f8..00000000
--- a/tests/st/resnet50/ptest.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ST_RESNET50_PTEST_H_
-#define ST_RESNET50_PTEST_H_
-
-#include <stdarg.h>
-#include <string.h>
-#include <exception>
-#include <functional>
-#include <iostream>
-#include <list>
-#include <map>
-#include <memory>
-#include <string>
-
-namespace ptest {
-class assertion_error : public std::exception {
- public:
-  const char *what() const throw() { return "Assertion Exception"; }
-};
-
-class TestFixture {
- public:
-  virtual void SetUp() {}
-  virtual void TearDown() {}
-  void Run() { _func(); }
-  void BindFunction(std::function<void(void)> function) { _func = function; }
-  void SetName(const std::string &name) { _name = name; }
-  std::string Name() const { return _name; }
-  virtual ~TestFixture() {}
-
- private:
-  std::function<void(void)> _func;
-  std::string _name;
-};
-
-enum TestResult { SUCCESS, FAILED, UNAVAILABLE, UNKNOWN, NOCASEFOUND };
-
-class TestManager {
- public:
-  static TestManager &GetSingleton() {
-    static TestManager instance;
-    return instance;
-  }
-  void RegisterTest(const std::string &name, TestFixture *fixture) { _testfixtures[name] = fixture; }
-
-  const std::string GetRunningTestcaseName() const { return _running_testcase_name; }
-
-  const std::list<std::string> GetAllTestNames() const {
-    std::list<std::string> result;
-    for (auto &t : _testfixtures) {
-      result.push_back(t.first);
-    }
-    return result;
-  }
-
-  TestResult RunTest(const std::string &name) {
-    if (_testfixtures.find(name) == _testfixtures.end()) {
-      return NOCASEFOUND;
-    }
-
-    _running_testcase_name = name;
-
-    do {
-      SetTestResult(name, UNKNOWN);
-      _testfixtures[name]->SetUp();
-      if (_testresults[name] == FAILED) {
-        _testresults[name] = UNAVAILABLE;
-        break;
-      }
-      SetTestResult(name, SUCCESS);
-      try {
-        _testfixtures[name]->Run();
-      } catch (assertion_error &e) {
-        // Do nothing as the error has been handled by the TestManager.
-      }
-      _testfixtures[name]->TearDown();
-    } while (0);
-
-    return _testresults[name];
-  }
-  void SetTestResult(const std::string &name, TestResult result) { _testresults[name] = result; }
-  TestResult GetTestResult(const std::string &name) { return _testresults[name]; }
-
- private:
-  std::map<std::string, TestFixture *> _testfixtures;
-  std::map<std::string, TestResult> _testresults;
-  std::string _running_testcase_name;
-};
-
-class TestFixtureRegister {
- public:
-  TestFixtureRegister(const std::string &name, TestFixture *fixture, std::function<void(void)> function) {
-    fixture->BindFunction(function);
-    fixture->SetName(name);
-    TestManager::GetSingleton().RegisterTest(name, fixture);
-  }
-};
-}  // namespace ptest
-
-#define _STR(x) #x
-#define _EMPTY_NAMESPACE
-
-#define _TEST(NAMESPACE, FIXTURECLASS, TESTNAME, CASENAME)                                              \
-  void g_func_##TESTNAME##_##CASENAME(void);                                                            \
-  NAMESPACE::FIXTURECLASS g_fixture_##TESTNAME##_##CASENAME;                                            \
-  ptest::TestFixtureRegister g_register_##TESTNAME##_##CASENAME(                                        \
-      _STR(TESTNAME##_##CASENAME), &g_fixture_##TESTNAME##_##CASENAME, g_func_##TESTNAME##_##CASENAME); \
-  void g_func_##TESTNAME##_##CASENAME(void)
-
-#define TEST(TESTNAME, CASENAME) _TEST(ptest, TestFixture, TESTNAME, CASENAME)
-
-#define TEST_F(TESTFIXTURE, CASENAME) _TEST(_EMPTY_NAMESPACE, TESTFIXTURE, TESTFIXTURE, CASENAME)
-
-#define EXPECT_TRUE(X)                                                                    \
-  do {                                                                                    \
-    if (!(X)) {                                                                           \
-      std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \
-      ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED);          \
-      std::cerr << #X << "Expectation Failed\n"                                           \
-                << "Testcase Name: " << test_name << "\n"                                  \
-                << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl;               \
-    }                                                                                     \
-  } while (0);
-
-// With the macro definition ensures that the compiler can detect compiler warning.
-#define Max_Log_Len 1024
-#define PRINT_ERR(lpszFormat, ...)                              \
-  do {                                                          \
-    char szTmpBuf[Max_Log_Len + 1] = {0};                       \
-    snprintf(szTmpBuf, Max_Log_Len, lpszFormat, ##__VA_ARGS__); \
-    std::cerr << szTmpBuf << std::endl;                         \
-  } while (0)
-
-// Increase the content of print error messages and error to facilitate rapid analysis
-#define EXPECT_TRUE_C(X, ERR_TYPE, format, ...)                                                             \
-  do {                                                                                                      \
-    if (!(X)) {                                                                                             \
-      std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName();                   \
-      ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED);                            \
-      std::cerr << #X << " Expectation Failed."                                                             \
-                << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \
-      PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__);                                                    \
-    }                                                                                                       \
-  } while (0)
-
-#define ASSERT_TRUE(X)                                                                    \
-  do {                                                                                    \
-    if (!(X)) {                                                                           \
-      std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName(); \
-      ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED);          \
-      std::cerr << #X << "Assertion Failed\n"                                             \
-                << "Testcase Name: " << test_name << "\n"                                  \
-                << "File: " __FILE__ << "\tLine:" << __LINE__ << std::endl;               \
-      throw ptest::assertion_error();                                                     \
-    }                                                                                     \
-  } while (0);
-
-// Add printing error information and error line content for quick analysis
-#define ASSERT_TRUE_C(X, ERR_TYPE, format, ...)                                                             \
-  do {                                                                                                      \
-    if (!(X)) {                                                                                             \
-      std::string test_name = ptest::TestManager::GetSingleton().GetRunningTestcaseName();                   \
-      ptest::TestManager::GetSingleton().SetTestResult(test_name, ptest::FAILED);                            \
-      std::cerr << #X << " Assertion Failed."                                                               \
-                << "Testcase Name: " << test_name << " File:" __FILE__ << " Line:" << __LINE__ << std::endl; \
-      PRINT_ERR("[" ERR_TYPE "]" format, ##__VA_ARGS__);                                                    \
-      throw ptest::assertion_error();                                                                       \
-    }                                                                                                       \
-  } while (0);
-
-#define CONFIG_ERR "CONFIG_ERR"
-#define LOAD_MODEL_ERR "LOAD_MODEL_ERR"
-#define FILE_READ_ERR "FILE_READ_ERR"
-#define RUN_ERROR "RUN_ERROR"
-#define MEM_ERROR "MEM_ERROR"
-#define RESULT_ERR "RESULT_ERR"
-
-#define EXPECT_FALSE(X) EXPECT_TRUE(!(X))
-#define EXPECT_EQ(X, Y) EXPECT_TRUE(((X) == (Y)))
-#define EXPECT_NE(X, Y) EXPECT_TRUE(((X) != (Y)))
-#define EXPECT_GT(X, Y) EXPECT_TRUE(((X) > (Y)))
-#define EXPECT_GE(X, Y) EXPECT_TRUE(((X) >= (Y)))
-#define EXPECT_LT(X, Y) EXPECT_TRUE(((X) < (Y)))
-#define EXPECT_LE(X, Y) EXPECT_TRUE(((X) <= (Y)))
-
-#define EXPECT_FALSE_C(X, ERR_TYPE, format, ...) EXPECT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_EQ_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_NE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_GT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_GE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_LT_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define EXPECT_LE_C(X, Y, ERR_TYPE, format, ...) EXPECT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-
-#define ASSERT_FALSE(X) ASSERT_TRUE(!(X))
-#define ASSERT_EQ(X, Y) ASSERT_TRUE(((X) == (Y)))
-#define ASSERT_NE(X, Y) ASSERT_TRUE(((X) != (Y)))
-#define ASSERT_GT(X, Y) ASSERT_TRUE(((X) > (Y)))
-#define ASSERT_GE(X, Y) ASSERT_TRUE(((X) >= (Y)))
-#define ASSERT_LT(X, Y) ASSERT_TRUE(((X) < (Y)))
-#define ASSERT_LE(X, Y) ASSERT_TRUE(((X) <= (Y)))
-
-#define ASSERT_FALSE_C(X, ERR_TYPE, format, ...) ASSERT_TRUE_C(!(X), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_EQ_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) == (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_NE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) != (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_GT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) > (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_GE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) >= (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_LT_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) < (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-#define ASSERT_LE_C(X, Y, ERR_TYPE, format, ...) ASSERT_TRUE_C(((X) <= (Y)), ERR_TYPE, format, ##__VA_ARGS__)
-
-#endif  // ST_RESNET50_PTEST_H_
diff --git a/tests/st/resnet50/resnet50_train.cc b/tests/st/resnet50/resnet50_train.cc
deleted file mode 100644
index f1d1e58d..00000000
--- a/tests/st/resnet50/resnet50_train.cc
+++ /dev/null
@@ -1,852 +0,0 @@
-﻿/**
- * Copyright 2019-2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <algorithm>
-#include <chrono>
-#include <ctime>
-#include <sstream>
-
-#include "common.h"
-#include "ge_api.h"
-#include "graph.h"
-#include "ops/all_ops.h"
-#include "types.h"
-#include "utils/tensor_utils.h"
-
-using namespace std;
-using namespace ge;
-using namespace op;
-
-typedef bool (*Func)(Graph &graph);
-
-#define PADDING_MODE 6
-#define GRAD_PADDING_MODE 3
-vector<int64_t> pad_1{1, 1, 1, 1};
-vector<int64_t> pad_0{0, 0, 0, 0};
-vector<int64_t> stride_1{1, 1};
-vector<int64_t> stride_2{2, 2};
-
-// (int out_channels, int h, int w, vector<uint_64> stride{1,1}, vector<uint_64> pad{1,1,1,1}, op::Data() input)
-#define GENERATE_CONV_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input)                     \
-  auto &LAYER##_##BLK##_##OPNUM##_input = input;                                                                      \
-                                                                                                                      \
-  TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT);     \
-  auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight");   \
-  LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                              \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mom_weight =                                                                         \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight");                                   \
-  LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                          \
-  LAYER##_##BLK##_##OPNUM##_mom_weight.update_input_desc_x(LAYER##_##BLK##_##OPNUM##_desc);                           \
-                                                                                                                      \
-  cout << string(#LAYER) + string(#BLK) + string(#OPNUM) << "'s weight shape is:" << in_channels << out_channels << h \
-       << w << endl;                                                                                                  \
-  cout << string(#LAYER) + string(#BLK) + string(#OPNUM)                                                              \
-       << "'s input_x op's shape is:" << input.GetOutputDesc("y").GetShape().GetDim(2) << endl;                       \
-  auto LAYER##_##BLK##_##OPNUM##_tmp_dims = input.GetOutputDesc("y").GetShape().GetDims();                            \
-  for (auto LAYER##_##BLK##_##OPNUM##_tmp_it = LAYER##_##BLK##_##OPNUM##_tmp_dims.begin();                            \
-       LAYER##_##BLK##_##OPNUM##_tmp_it != LAYER##_##BLK##_##OPNUM##_tmp_dims.end();                                  \
-       LAYER##_##BLK##_##OPNUM##_tmp_it++) {                                                                          \
-    cout << *LAYER##_##BLK##_##OPNUM##_tmp_it;                                                                        \
-  }                                                                                                                   \
-  cout << endl;                                                                                                       \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM = op::Conv2D(string(#LAYER) + string(#BLK) + string(#OPNUM))                           \
-                                     .set_input_x(input, "y")                                                         \
-                                     .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight)                              \
-                                     .set_attr_strides({1, 1, stride[0], stride[1]})                                  \
-                                     .set_attr_pads(pad)                                                              \
-                                     .set_attr_data_format("NCHW");                                                   \
-  update_op_format(LAYER##_##BLK##_##OPNUM);
-
-#define GENERATE_CONSTANT(LAYER, BLK, OPNUM, CONSTNAME)                                                           \
-  Tensor LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor;                                                          \
-  float *LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data = new float[LAYER##_##BLK##_##OPNUM##_size];                \
-  for (int i = 0; i < (int)LAYER##_##BLK##_##OPNUM##_size; i++) {                                                 \
-    *(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data + i) = 0.01;                                                   \
-  }                                                                                                               \
-  LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetData((uint8_t *)LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data, \
-                                                         LAYER##_##BLK##_##OPNUM##_size * sizeof(float));         \
-  LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor.SetTensorDesc(LAYER##_##BLK##_##OPNUM##_desc);                   \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant =                                                         \
-      op::Constant().set_attr_value(LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_tensor);                              \
-  LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_constant.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);          \
-  delete[] LAYER##_##BLK##_##OPNUM##_##CONSTNAME##_data;
-
-#define GENERATE_CONV_VAR_VAR(LAYER, BLK, OPNUM, in_channels, out_channels, h, w, stride, pad, input)               \
-  TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({out_channels, in_channels, h, w}), FORMAT_NCHW, DT_FLOAT);   \
-  uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize();               \
-  auto LAYER##_##BLK##_##OPNUM##_weight = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_weight"); \
-  LAYER##_##BLK##_##OPNUM##_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                            \
-                                                                                                                    \
-  auto LAYER##_##BLK##_##OPNUM##_mom_weight =                                                                       \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_weight");                                 \
-  LAYER##_##BLK##_##OPNUM##_mom_weight.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                        \
-                                                                                                                    \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, weight);                                                                     \
-  auto LAYER##_##BLK##_##OPNUM##_weight_assign = op::Assign()                                                       \
-                                                     .set_input_ref(LAYER##_##BLK##_##OPNUM##_weight)               \
-                                                     .set_input_value(LAYER##_##BLK##_##OPNUM##_weight_constant);   \
-                                                                                                                    \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_weight);                                                                 \
-  auto LAYER##_##BLK##_##OPNUM##_mom_weight_assign =                                                                \
-      op::Assign()                                                                                                  \
-          .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_weight)                                                      \
-          .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_weight_constant);                                          \
-                                                                                                                    \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_weight);                                                                \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_mom_weight);
-
-// (int out_channels, Operator& input)
-#define GENERATE_BN_VAR(LAYER, BLK, OPNUM, out_channels, input)                                                   \
-  auto &LAYER##_##BLK##_##OPNUM##_input = input;                                                                  \
-                                                                                                                  \
-  TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT);           \
-  auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale"); \
-  LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                           \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM##_mom_scale =                                                                      \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale");                                \
-  LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                       \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b");         \
-  LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                               \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b"); \
-  LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                           \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean");   \
-  LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                            \
-  auto LAYER##_##BLK##_##OPNUM##_variance =                                                                       \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance");                                 \
-  LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                        \
-                                                                                                                  \
-  auto LAYER##_##BLK##_##OPNUM = op::FusedBatchNorm(string(#LAYER) + string(#BLK) + string(#OPNUM))               \
-                                     .set_input_x(input, "y")                                                     \
-                                     .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale)                            \
-                                     .set_input_b(LAYER##_##BLK##_##OPNUM##_b)                                    \
-                                     .set_input_mean(LAYER##_##BLK##_##OPNUM##_mean)                              \
-                                     .set_input_variance(LAYER##_##BLK##_##OPNUM##_variance)                      \
-                                     .set_attr_mode(1)                                                            \
-                                     .set_attr_epsilon(1e-5)                                                      \
-                                     .set_attr_is_training(true);
-
-#define GENERATE_BN_VAR_VAR(LAYER, BLK, OPNUM, out_channels, input)                                                   \
-  TensorDesc LAYER##_##BLK##_##OPNUM##_desc(ge::Shape({1, out_channels, 1, 1}), FORMAT_NCHW, DT_FLOAT);               \
-  uint32_t LAYER##_##BLK##_##OPNUM##_size = LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetShapeSize();                 \
-  auto LAYER##_##BLK##_##OPNUM##_scale = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_scale");     \
-  LAYER##_##BLK##_##OPNUM##_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                               \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mom_scale =                                                                          \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_scale");                                    \
-  LAYER##_##BLK##_##OPNUM##_mom_scale.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                           \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_b");             \
-  LAYER##_##BLK##_##OPNUM##_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                                   \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mom_b = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mom_b");     \
-  LAYER##_##BLK##_##OPNUM##_mom_b.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                               \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mean = op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_mean");       \
-  LAYER##_##BLK##_##OPNUM##_mean.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                                \
-  auto LAYER##_##BLK##_##OPNUM##_variance =                                                                           \
-      op::Variable(string(#LAYER) + string(#BLK) + string(#OPNUM) + "_variance");                                     \
-  LAYER##_##BLK##_##OPNUM##_variance.update_output_desc_y(LAYER##_##BLK##_##OPNUM##_desc);                            \
-                                                                                                                      \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, scale);                                                                        \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_scale_assign = op::Assign()                                                          \
-                                                    .set_input_ref(LAYER##_##BLK##_##OPNUM##_scale)                   \
-                                                    .set_input_value(LAYER##_##BLK##_##OPNUM##_scale_constant);       \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_scale);                                                                    \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mom_scale_assign =                                                                   \
-      op::Assign()                                                                                                    \
-          .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_scale)                                                         \
-          .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_scale_constant);                                             \
-                                                                                                                      \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, b);                                                                            \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_b_assign =                                                                           \
-      op::Assign().set_input_ref(LAYER##_##BLK##_##OPNUM##_b).set_input_value(LAYER##_##BLK##_##OPNUM##_b_constant);  \
-                                                                                                                      \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, mom_b);                                                                        \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mom_b_assign = op::Assign()                                                          \
-                                                    .set_input_ref(LAYER##_##BLK##_##OPNUM##_mom_b)                   \
-                                                    .set_input_value(LAYER##_##BLK##_##OPNUM##_mom_b_constant);       \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, mean);                                                                         \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_mean_assign = op::Assign()                                                           \
-                                                   .set_input_ref(LAYER##_##BLK##_##OPNUM##_mean)                     \
-                                                   .set_input_value(LAYER##_##BLK##_##OPNUM##_mean_constant);         \
-                                                                                                                      \
-  GENERATE_CONSTANT(LAYER, BLK, OPNUM, variance);                                                                     \
-                                                                                                                      \
-  auto LAYER##_##BLK##_##OPNUM##_variance_assign = op::Assign()                                                       \
-                                                       .set_input_ref(LAYER##_##BLK##_##OPNUM##_variance)             \
-                                                       .set_input_value(LAYER##_##BLK##_##OPNUM##_variance_constant); \
-                                                                                                                      \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_scale);                                                                   \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_mom_scale);                                                               \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_b);                                                                       \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_mom_b);                                                                   \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_mean);                                                                    \
-  input.push_back(LAYER##_##BLK##_##OPNUM##_variance);
-
-// (int out_channels, Operator& input)
-#define GENERATE_RELU_VAR(LAYER, BLK, OPNUM, input) \
-  auto &LAYER##_##BLK##_##OPNUM##_input = input;    \
-  auto LAYER##_##BLK##_##OPNUM = op::Relu(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x(input, "y");
-
-// (int out_channels, Operator& input)
-#define GENERATE_MAXPOOL_VAR(LAYER, BLK, OPNUM, input)                                                 \
-  auto &LAYER##_##BLK##_##OPNUM##_input = input;                                                       \
-                                                                                                       \
-  auto LAYER##_##BLK##_##OPNUM = op::MaxPoolWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM)) \
-                                     .set_input_x(input, "y")                                          \
-                                     .set_attr_ksize({1, 3, 3, 1})                                     \
-                                     .set_attr_padding("SAME")                                         \
-                                     .set_attr_strides({1, 2, 2, 1});
-
-// (int out_channels, Operator& input)
-#define GENERATE_ADD_VAR(LAYER, BLK, OPNUM, input_x1, input_x2) \
-  auto LAYER##_##BLK##_##OPNUM =                                \
-      op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM)).set_input_x1(input_x1, "y").set_input_x2(input_x2, "y");
-
-// (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
-#define MAKE_RESIDUAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input)                                 \
-  auto &LAYER##_##BLK##_input = input;                                                                            \
-  auto &LAYER##_##BLK##_stride = stride;                                                                          \
-  int LAYER##_##BLK##_out_chls = out_channels / 4;                                                                \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input);        \
-  GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1);                              \
-  GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1);                                                      \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
-                    LAYER##_##BLK##_relu1);                                                                       \
-  GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2);                              \
-  GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2);                                                      \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0,             \
-                    LAYER##_##BLK##_relu2);                                                                       \
-  GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3);                                          \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input);                    \
-  GENERATE_BN_VAR(LAYER, BLK, bn4, out_channels, LAYER##_##BLK##_conv4);                                          \
-                                                                                                                  \
-  GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, LAYER##_##BLK##_bn4);                                   \
-  GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5);                                                     \
-                                                                                                                  \
-  auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5;                                                           \
-  auto &LAYER##_##BLK##_output_label = "y";
-
-#define MAKE_RESIDUAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input)                                 \
-  int LAYER##_##BLK##_out_chls = out_channels / 4;                                                                    \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input);        \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input);                                              \
-                                                                                                                      \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
-                        input);                                                                                       \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input);                                              \
-                                                                                                                      \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input);     \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input);                                                          \
-                                                                                                                      \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv4, in_channels, out_channels, 1, 1, stride, pad_0, input);                    \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn4, out_channels, input);
-
-// (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
-#define MAKE_NORMAL_BLOCK(LAYER, BLK, in_channels, out_channels, stride, input)                                   \
-  auto &LAYER##_##BLK##_input = input;                                                                            \
-  auto &LAYER##_##BLK##_stride = stride;                                                                          \
-  int LAYER##_##BLK##_out_chls = out_channels / 4;                                                                \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input);        \
-  GENERATE_BN_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv1);                              \
-  GENERATE_RELU_VAR(LAYER, BLK, relu1, LAYER##_##BLK##_bn1);                                                      \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
-                    LAYER##_##BLK##_relu1);                                                                       \
-  GENERATE_BN_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_conv2);                              \
-  GENERATE_RELU_VAR(LAYER, BLK, relu2, LAYER##_##BLK##_bn2);                                                      \
-                                                                                                                  \
-  GENERATE_CONV_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0,             \
-                    LAYER##_##BLK##_relu2);                                                                       \
-  GENERATE_BN_VAR(LAYER, BLK, bn3, out_channels, LAYER##_##BLK##_conv3);                                          \
-                                                                                                                  \
-  GENERATE_ADD_VAR(LAYER, BLK, add5, LAYER##_##BLK##_bn3, input);                                                 \
-  GENERATE_RELU_VAR(LAYER, BLK, relu5, LAYER##_##BLK##_add5);                                                     \
-                                                                                                                  \
-  auto &LAYER##_##BLK##_output = LAYER##_##BLK##_relu5;                                                           \
-  auto &LAYER##_##BLK##_output_label = "y";
-
-#define MAKE_NORMAL_BLOCK_VAR(LAYER, BLK, in_channels, out_channels, stride, input)                                   \
-  int LAYER##_##BLK##_out_chls = out_channels / 4;                                                                    \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv1, in_channels, LAYER##_##BLK##_out_chls, 1, 1, stride, pad_0, input);        \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn1, LAYER##_##BLK##_out_chls, input);                                              \
-                                                                                                                      \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv2, LAYER##_##BLK##_out_chls, LAYER##_##BLK##_out_chls, 3, 3, stride_1, pad_1, \
-                        input);                                                                                       \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn2, LAYER##_##BLK##_out_chls, input);                                              \
-                                                                                                                      \
-  GENERATE_CONV_VAR_VAR(LAYER, BLK, conv3, LAYER##_##BLK##_out_chls, out_channels, 1, 1, stride_1, pad_0, input);     \
-  GENERATE_BN_VAR_VAR(LAYER, BLK, bn3, out_channels, input);
-
-// (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
-#define MAKE_RESIDUAL_LAYER(LAYER, in_channels, out_channels, stride, input)  \
-  MAKE_RESIDUAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \
-                                                                              \
-  auto &LAYER##_output = LAYER##_blk1_output;                                 \
-  auto &LAYER##_output_label = LAYER##_blk1_output_label;
-
-#define MAKE_RESIDUAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \
-  MAKE_RESIDUAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input);
-
-// (int in_channels, int out_channels,vector<int64_t> stride{1,1}, Operator& input)
-#define MAKE_NORMAL_LAYER(LAYER, in_channels, out_channels, stride, input)  \
-  MAKE_NORMAL_BLOCK(LAYER, blk1, in_channels, out_channels, stride, input); \
-                                                                            \
-  auto &LAYER##_output = LAYER##_blk1_output;                               \
-  auto &LAYER##_output_label = LAYER##_blk1_output_label;
-
-#define MAKE_NORMAL_LAYER_VAR(LAYER, in_channels, out_channels, stride, input) \
-  MAKE_NORMAL_BLOCK_VAR(LAYER, blk1, in_channels, out_channels, stride, input);
-
-#define MAKE_RESNET50(input)                                         \
-  MAKE_RESIDUAL_LAYER(layer1, 64, 256, stride_1, input)              \
-  MAKE_NORMAL_LAYER(layer2, 256, 256, stride_1, layer1_output)       \
-  MAKE_NORMAL_LAYER(layer3, 256, 256, stride_1, layer2_output)       \
-  MAKE_RESIDUAL_LAYER(layer4, 256, 512, stride_2, layer3_output)     \
-  MAKE_NORMAL_LAYER(layer5, 512, 512, stride_1, layer4_output)       \
-  MAKE_NORMAL_LAYER(layer6, 512, 512, stride_1, layer5_output)       \
-  MAKE_NORMAL_LAYER(layer7, 512, 512, stride_1, layer6_output)       \
-  MAKE_RESIDUAL_LAYER(layer8, 512, 1024, stride_2, layer7_output)    \
-  MAKE_NORMAL_LAYER(layer9, 1024, 1024, stride_1, layer8_output)     \
-  MAKE_NORMAL_LAYER(layer10, 1024, 1024, stride_1, layer9_output)    \
-  MAKE_NORMAL_LAYER(layer11, 1024, 1024, stride_1, layer10_output)   \
-  MAKE_NORMAL_LAYER(layer12, 1024, 1024, stride_1, layer11_output)   \
-  MAKE_NORMAL_LAYER(layer13, 1024, 1024, stride_1, layer12_output)   \
-  MAKE_RESIDUAL_LAYER(layer14, 1024, 2048, stride_2, layer13_output) \
-  MAKE_NORMAL_LAYER(layer15, 2048, 2048, stride_1, layer14_output)   \
-  MAKE_NORMAL_LAYER(layer16, 2048, 2048, stride_1, layer15_output)   \
-                                                                     \
-  auto &resnet50_output = layer16_output;                            \
-  auto &resnet50_output_label = layer16_output_label;
-
-#define MAKE_RESNET50_VAR(inputs)                                \
-  MAKE_RESIDUAL_LAYER_VAR(layer1, 64, 256, stride_1, inputs)     \
-  MAKE_NORMAL_LAYER_VAR(layer2, 256, 256, stride_1, inputs)      \
-  MAKE_NORMAL_LAYER_VAR(layer3, 256, 256, stride_1, inputs)      \
-  MAKE_RESIDUAL_LAYER_VAR(layer4, 256, 512, stride_2, inputs)    \
-  MAKE_NORMAL_LAYER_VAR(layer5, 512, 512, stride_1, inputs)      \
-  MAKE_NORMAL_LAYER_VAR(layer6, 512, 512, stride_1, inputs)      \
-  MAKE_NORMAL_LAYER_VAR(layer7, 512, 512, stride_1, inputs)      \
-  MAKE_RESIDUAL_LAYER_VAR(layer8, 512, 1024, stride_2, inputs)   \
-  MAKE_NORMAL_LAYER_VAR(layer9, 1024, 1024, stride_1, inputs)    \
-  MAKE_NORMAL_LAYER_VAR(layer10, 1024, 1024, stride_1, inputs)   \
-  MAKE_NORMAL_LAYER_VAR(layer11, 1024, 1024, stride_1, inputs)   \
-  MAKE_NORMAL_LAYER_VAR(layer12, 1024, 1024, stride_1, inputs)   \
-  MAKE_NORMAL_LAYER_VAR(layer13, 1024, 1024, stride_1, inputs)   \
-  MAKE_RESIDUAL_LAYER_VAR(layer14, 1024, 2048, stride_2, inputs) \
-  MAKE_NORMAL_LAYER_VAR(layer15, 2048, 2048, stride_1, inputs)   \
-  MAKE_NORMAL_LAYER_VAR(layer16, 2048, 2048, stride_1, inputs)   \
-//---------------------------------------------------------------------------------------------
-
-// (Operator& input)
-#define GENERATE_BIASADD_GRAD(LAYER, BLK, OPNUM, input)                                \
-  auto LAYER##_##BLK##_##OPNUM##_grad =                                                \
-      op::BiasAddGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
-          .set_input_x(input, input.name_out_dx());
-
-// (Operator& input)
-#define GENERATE_MATMUL_GRAD(LAYER, BLK, OPNUM, input) \
-  auto LAYER##_##BLK##_##OPNUM##_grad =                \
-      op::MatMul(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_x1(input);
-
-// (Operator& input)
-#define GENERATE_RESHAPE_GRAD(LAYER, BLK, OPNUM, input) \
-  auto LAYER##_##BLK##_##OPNUM##_grad =                 \
-      op::Reshape(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")).set_input_tensor(input);
-
-// (Operator& input_grad, Operator& input_maxpool)
-#define GENERATE_MAXPOOL_GRAD(LAYER, BLK, OPNUM, input_grad, input_maxpool)                      \
-  auto LAYER##_##BLK##_##OPNUM##_grad =                                                          \
-      op::MaxPoolGradWithArgmax(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
-          .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y")                                     \
-          .set_input_grad(input_grad)                                                            \
-          .set_input_argmax(input_maxpool, input_maxpool.name_out_argmax())                      \
-          .set_attr_ksize({1, 1, 3, 3})                                                          \
-          .set_attr_strides({1, 1, 2, 2})                                                        \
-          .set_attr_padding("SAME");
-
-// (Operator& input_dy)
-#define GENERATE_RELU_GRAD(LAYER, BLK, OPNUM, input_dy, dy_label)                                                     \
-  auto LAYER##_##BLK##_##OPNUM##_grad = op::ReluGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
-                                            .set_input_gradients(input_dy, dy_label)                                  \
-                                            .set_input_features(LAYER##_##BLK##_##OPNUM, "y");
-
-// (Operator& input_dy)
-#define GENERATE_BN_GRAD(LAYER, BLK, OPNUM, input_dy)                                                         \
-  auto LAYER##_##BLK##_##OPNUM##_grad =                                                                       \
-      op::FusedBatchNormGrad(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad"))                 \
-          .set_input_dy(input_dy, "backprops")                                                                \
-          .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y")                                                  \
-          .set_input_scale(LAYER##_##BLK##_##OPNUM##_scale)                                                   \
-          .set_input_save_mean(LAYER##_##BLK##_##OPNUM, "save_mean")                                          \
-          .set_input_save_inv_variance(LAYER##_##BLK##_##OPNUM, "save_inv_variance")                          \
-          .set_attr_epsilon(0.0001);                                                                          \
-                                                                                                              \
-  auto LAYER##_##BLK##_##OPNUM##_momentum_scale =                                                             \
-      op::ApplyMomentum()                                                                                     \
-          .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_scale)                                               \
-          .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_scale()) \
-          .set_input_lr(label1)                                                                               \
-          .set_input_momentum(label1)                                                                         \
-          .set_input_var(LAYER##_##BLK##_##OPNUM##_scale);                                                    \
-                                                                                                              \
-  auto LAYER##_##BLK##_##OPNUM##_momentum_b =                                                                 \
-      op::ApplyMomentum()                                                                                     \
-          .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_b)                                                   \
-          .set_input_grad(LAYER##_##BLK##_##OPNUM##_grad, LAYER##_##BLK##_##OPNUM##_grad.name_out_bn_bias())  \
-          .set_input_lr(label1)                                                                               \
-          .set_input_momentum(label1)                                                                         \
-          .set_input_var(LAYER##_##BLK##_##OPNUM##_b);
-
-// (Operator& input)
-#define GENERATE_CONV_PROP_FILTER(LAYER, BLK, OPNUM, input_bngrad, stride)                                    \
-  auto LAYER##_##BLK##_##OPNUM##_propfilter =                                                                 \
-      op::Conv2DBackpropFilterD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propfilter"))       \
-          .set_input_x(LAYER##_##BLK##_##OPNUM##_input, "y")                                                  \
-          .set_attr_filter_size(LAYER##_##BLK##_##OPNUM##_desc.GetShape().GetDims())                          \
-          .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx())                                   \
-          .set_attr_strides(stride)                                                                           \
-          .set_attr_pads({1, 1, 1, 1});                                                                       \
-                                                                                                              \
-  update_op_format(LAYER##_##BLK##_##OPNUM##_propfilter);                                                     \
-  auto LAYER##_##BLK##_##OPNUM##_momentum_weight = op::ApplyMomentum()                                        \
-                                                       .set_input_accum(LAYER##_##BLK##_##OPNUM##_mom_weight) \
-                                                       .set_input_grad(LAYER##_##BLK##_##OPNUM##_propfilter)  \
-                                                       .set_input_lr(label1)                                  \
-                                                       .set_input_momentum(label1)                            \
-                                                       .set_input_var(LAYER##_##BLK##_##OPNUM##_weight);
-
-///.set_attr_input_size({input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(0),LAYER##_##BLK##_##OPNUM##_weight.GetOutputDesc().GetShape().GetDim(1),
-///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(2)*stride[2],
-///input_bngrad.name_out_dx().GetOutputDesc().GetShape().GetDim(3)*stride[3]})
-#define GENERATE_CONV_PROP_INPUT(LAYER, BLK, OPNUM, input_bngrad, stride)                                           \
-  auto LAYER##_##BLK##_##OPNUM##_propinput =                                                                        \
-      op::Conv2DBackpropInputD(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("_propinput"))               \
-          .set_attr_input_size(LAYER##_##BLK##_##OPNUM##_input.GetOutputDesc("y").GetShape().GetDims())             \
-          .set_input_filter(LAYER##_##BLK##_##OPNUM##_weight)                                                       \
-          .set_input_out_backprop(input_bngrad, input_bngrad.name_out_dx())                                         \
-          .set_attr_strides(stride)                                                                                 \
-          .set_attr_pads({1, 1, 1, 1});                                                                             \
-  cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput"                                             \
-       << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(3) * stride[3] << endl; \
-  cout << string(#LAYER) + string(#BLK) + string(#OPNUM) + "_propinput"                                             \
-       << "'s input_x op's shape is:" << input_bngrad.GetOutputDesc("dx").GetShape().GetDim(2) * stride[2] << endl; \
-                                                                                                                    \
-  update_op_format(LAYER##_##BLK##_##OPNUM##_propinput);                                                            \
-  auto &LAYER##_##BLK##_##OPNUM##_propinput_label = "y"
-
-// (int out_channels, Operator& input)
-#define GENERATE_ADD_GRAD(LAYER, BLK, OPNUM, input_x1, input_x1_label, input_x2, input_x2_label)                 \
-  auto LAYER##_##BLK##_##OPNUM##_grad = op::Add(string(#LAYER) + string(#BLK) + string(#OPNUM) + string("grad")) \
-                                            .set_input_x1(input_x1, input_x1_label)                              \
-                                            .set_input_x2(input_x2, input_x2_label);
-
-// (Operator& input)
-#define MAKE_RESIDUAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label)                                              \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label);                                                  \
-                                                                                                              \
-  GENERATE_BN_GRAD(LAYER, BLK, bn4, LAYER##_##BLK##_relu5_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride);             \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv4, LAYER##_##BLK##_bn4_grad, LAYER##_##BLK##_stride);              \
-                                                                                                              \
-  GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1);                           \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1);                            \
-                                                                                                              \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y");                                \
-  GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1);                           \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1);                            \
-                                                                                                              \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y");                                \
-  GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride);             \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride);              \
-                                                                                                              \
-  GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \
-                    LAYER##_##BLK##_conv4_propinput, LAYER##_##BLK##_conv4_propinput_label);                  \
-                                                                                                              \
-  auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad;                                              \
-  auto &LAYER##_##BLK##_grad_output_label = "y"
-
-// (Operator& input)
-#define MAKE_NORMAL_BLOCK_GRAD(LAYER, BLK, input_dy, dy_label)                                                \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu5, input_dy, dy_label);                                                  \
-                                                                                                              \
-  GENERATE_BN_GRAD(LAYER, BLK, bn3, LAYER##_##BLK##_relu5_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1);                           \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv3, LAYER##_##BLK##_bn3_grad, stride_1);                            \
-                                                                                                              \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu2, LAYER##_##BLK##_conv3_propinput, "y");                                \
-  GENERATE_BN_GRAD(LAYER, BLK, bn2, LAYER##_##BLK##_relu2_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1);                           \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv2, LAYER##_##BLK##_bn2_grad, stride_1);                            \
-                                                                                                              \
-  GENERATE_RELU_GRAD(LAYER, BLK, relu1, LAYER##_##BLK##_conv2_propinput, "y");                                \
-  GENERATE_BN_GRAD(LAYER, BLK, bn1, LAYER##_##BLK##_relu1_grad);                                              \
-  GENERATE_CONV_PROP_FILTER(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride);             \
-  GENERATE_CONV_PROP_INPUT(LAYER, BLK, conv1, LAYER##_##BLK##_bn1_grad, LAYER##_##BLK##_stride);              \
-                                                                                                              \
-  GENERATE_ADD_GRAD(LAYER, BLK, add5, LAYER##_##BLK##_conv1_propinput, LAYER##_##BLK##_conv1_propinput_label, \
-                    input_dy, dy_label);                                                                      \
-                                                                                                              \
-  auto &LAYER##_##BLK##_grad_output = LAYER##_##BLK##_add5_grad;                                              \
-  auto &LAYER##_##BLK##_grad_output_label = "y"
-
-// (Operator& input_dy)
-#define MAKE_RESIDUAL_LAYER_GRAD(LAYER, input_dy, dy_label)  \
-  MAKE_RESIDUAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \
-                                                             \
-  auto &LAYER##_grad_output = LAYER##_blk1_grad_output;      \
-  auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label;
-
-// (Operator& input_dy)
-#define MAKE_NORMAL_LAYER_GRAD(LAYER, input_dy, dy_label)  \
-  MAKE_NORMAL_BLOCK_GRAD(LAYER, blk1, input_dy, dy_label); \
-                                                           \
-  auto &LAYER##_grad_output = LAYER##_blk1_grad_output;    \
-  auto &LAYER##_grad_output_label = LAYER##_blk1_grad_output_label;
-
-#define MAKE_RESNET50_GRAD(input_dy, dy_label)                                      \
-  MAKE_NORMAL_LAYER_GRAD(layer16, input_dy, dy_label)                               \
-  MAKE_NORMAL_LAYER_GRAD(layer15, layer16_grad_output, layer16_grad_output_label)   \
-  MAKE_RESIDUAL_LAYER_GRAD(layer14, layer15_grad_output, layer15_grad_output_label) \
-  MAKE_NORMAL_LAYER_GRAD(layer13, layer14_grad_output, layer14_grad_output_label)   \
-  MAKE_NORMAL_LAYER_GRAD(layer12, layer13_grad_output, layer13_grad_output_label)   \
-  MAKE_NORMAL_LAYER_GRAD(layer11, layer12_grad_output, layer12_grad_output_label)   \
-  MAKE_NORMAL_LAYER_GRAD(layer10, layer11_grad_output, layer11_grad_output_label)   \
-  MAKE_NORMAL_LAYER_GRAD(layer9, layer10_grad_output, layer10_grad_output_label)    \
-  MAKE_RESIDUAL_LAYER_GRAD(layer8, layer9_grad_output, layer9_grad_output_label)    \
-  MAKE_NORMAL_LAYER_GRAD(layer7, layer8_grad_output, layer8_grad_output_label)      \
-  MAKE_NORMAL_LAYER_GRAD(layer6, layer7_grad_output, layer7_grad_output_label)      \
-  MAKE_NORMAL_LAYER_GRAD(layer5, layer6_grad_output, layer6_grad_output_label)      \
-  MAKE_RESIDUAL_LAYER_GRAD(layer4, layer5_grad_output, layer5_grad_output_label)    \
-  MAKE_NORMAL_LAYER_GRAD(layer3, layer4_grad_output, layer4_grad_output_label)      \
-  MAKE_NORMAL_LAYER_GRAD(layer2, layer3_grad_output, layer3_grad_output_label)      \
-  MAKE_RESIDUAL_LAYER_GRAD(layer1, layer2_grad_output, layer2_grad_output_label)    \
-                                                                                    \
-  auto &resnet50_grad_output = layer1_grad_output;                                  \
-  auto &resnet50_grad_output_label = layer1_grad_output_label;
-
-bool resnet50(Graph &graph) {
-  auto data = op::Data().set_attr_index(0);
-  auto data1 = op::Data().set_attr_index(1);
-  TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT);
-  data.update_output_desc_y(shape_desc);
-
-  TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT);
-
-  auto var = op::Variable("conv2d_var");
-  var.update_output_desc_y(desc);
-  var.update_input_desc_x(desc);
-
-  auto varw1 = op::Variable("conv2d_varw1");
-  varw1.update_output_desc_y(desc);
-
-  auto conv2d = op::Conv2D("Translate")
-                    .set_input_x(data)
-                    .set_input_filter(var)
-                    .set_attr_strides({1, 1, 2, 2})
-                    .set_attr_pads({2, 3, 2, 3})
-                    .set_attr_data_format("NCHW");
-  TensorDesc desc_y;
-  desc_y.SetFormat(FORMAT_NCHW); // shape: 32 64 112 112
-  conv2d.update_output_desc_y(desc_y);
-
-  TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  auto var1 = op::Variable("bn_var1");
-  var1.update_output_desc_y(desc1);
-
-  auto var2 = op::Variable("bn_var2");
-  var2.update_output_desc_y(desc1);
-
-  auto var3 = op::Variable("bn_var3");
-  var3.update_output_desc_y(desc1);
-
-  auto var4 = op::Variable("bn_var4");
-  var4.update_output_desc_y(desc1);
-
-  TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT);
-
-  auto var5 = op::Variable("var5");
-  var5.update_output_desc_y(desc2);
-
-  auto var6 = op::Variable("var6");
-  var6.update_output_desc_y(desc2);
-
-  TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-
-  auto label1 = op::Variable("label1");
-  label1.update_output_desc_y(desclabel);
-
-  TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  auto matvar = op::Variable("matvar");
-  matvar.update_output_desc_y(descmatlabel);
-
-  auto matvar1 = op::Variable("matvar1");
-  matvar1.update_output_desc_y(descmatlabel);
-
-  auto bn = op::FusedBatchNorm()
-                .set_input_x(conv2d, "y")
-                .set_input_scale(var1)
-                .set_input_b(var2)
-                .set_input_mean(var3)
-                .set_input_variance(var4)
-                .set_attr_mode(1)
-                .set_attr_epsilon(1e-5)
-                .set_attr_is_training(true)
-                .set_attr_is_training_fusion(true)
-                .set_attr_moving_average_fraction(994352128);
-
-  auto relu = op::Relu().set_input_x(bn, "y");
-
-  auto maxpool = op::MaxPoolWithArgmax()
-                     .set_input_x(relu, "y")
-                     .set_attr_ksize({1, 3, 3, 1})
-                     .set_attr_padding("SAME")
-                     .set_attr_strides({1, 2, 2, 1});
-
-  MAKE_RESNET50(maxpool);
-  std::vector<Operator> inputs{data};  //,var,var1,layer1_blk1_bn1_b,var3,var4};
-  std::vector<Operator> outputs{};
-
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return true;
-}
-
-#define GENERATE_CONSTANT_USE_DESC(OPNUM, desc, val)                                 \
-  uint32_t OPNUM##_size = desc.GetShape().GetShapeSize();                            \
-  Tensor OPNUM##_tensor;                                                             \
-  OPNUM##_tensor.SetTensorDesc(desc);                                                \
-  if (desc.GetDataType() == DT_FLOAT) {                                              \
-    float *OPNUM##_data = new float[OPNUM##_size];                                   \
-    for (int i = 0; i < (int)OPNUM##_size; i++) {                                    \
-      *(OPNUM##_data + i) = val;                                                     \
-    }                                                                                \
-    OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(float));   \
-    delete[] OPNUM##_data;                                                           \
-  }                                                                                  \
-  if (desc.GetDataType() == DT_INT64) {                                              \
-    int64_t *OPNUM##_data = new int64_t[OPNUM##_size];                               \
-    for (int i = 0; i < (int)OPNUM##_size; i++) {                                    \
-      *(OPNUM##_data + i) = val;                                                     \
-    }                                                                                \
-    OPNUM##_tensor.SetData((uint8_t *)OPNUM##_data, OPNUM##_size * sizeof(int64_t)); \
-    delete[] OPNUM##_data;                                                           \
-  }                                                                                  \
-  auto OPNUM##_constant = op::Constant().set_attr_value(OPNUM##_tensor);             \
-  OPNUM##_constant.update_output_desc_y(desc);
-
-#define GENERATE_VAR_LAYER(OPNUM, desc, input)                                                        \
-  auto OPNUM##_weight = op::Variable(string(#OPNUM));                                                 \
-  OPNUM##_weight.update_output_desc_y(desc);                                                          \
-  auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \
-                                                                                                      \
-  input.push_back(OPNUM##_weight);
-
-#define GENERATE_VAR_LAYER_1(OPNUM, desc, var_format, input, name)                                    \
-  auto OPNUM##_weight = op::Variable(string(name));                                                   \
-  OPNUM##_weight.update_output_desc_y(desc);                                                          \
-  auto OPNUM##_assign = op::Assign().set_input_ref(OPNUM##_weight).set_input_value(OPNUM##_constant); \
-                                                                                                      \
-  input.push_back(OPNUM##_weight);
-
-int BuildInitVarGraph(Graph &graph) {
-  std::vector<Operator> inputs{};
-  std::vector<Operator> outputs{};
-
-  TensorDesc desc(ge::Shape({64, 3, 7, 7}), FORMAT_NCHW, DT_FLOAT);
-  GENERATE_CONSTANT_USE_DESC(conv2d_var, desc, 0.01);
-  GENERATE_VAR_LAYER(conv2d_var, desc, inputs);
-
-  GENERATE_CONSTANT_USE_DESC(conv2d_varw1, desc, 0.01);
-  GENERATE_VAR_LAYER(conv2d_varw1, desc, inputs);
-
-  TensorDesc desc1(ge::Shape({1, 64, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  GENERATE_CONSTANT_USE_DESC(bn_var1, desc1, 0.01);
-  GENERATE_VAR_LAYER(bn_var1, desc1, inputs);
-  GENERATE_CONSTANT_USE_DESC(bn_var2, desc1, 0.01);
-  GENERATE_VAR_LAYER(bn_var2, desc1, inputs);
-  GENERATE_CONSTANT_USE_DESC(bn_var3, desc1, 0.01);
-  GENERATE_VAR_LAYER(bn_var3, desc1, inputs);
-  GENERATE_CONSTANT_USE_DESC(bn_var4, desc1, 0.01);
-  GENERATE_VAR_LAYER(bn_var4, desc1, inputs);
-
-  TensorDesc desc2(ge::Shape({2048, 1001}), FORMAT_NCHW, DT_FLOAT);
-  GENERATE_CONSTANT_USE_DESC(var5, desc2, 0.01);
-  GENERATE_VAR_LAYER(var5, desc2, inputs);
-  GENERATE_CONSTANT_USE_DESC(var6, desc2, 0.01);
-  GENERATE_VAR_LAYER(var6, desc2, inputs);
-
-  TensorDesc desclabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  GENERATE_CONSTANT_USE_DESC(label1, desclabel, 0.1);
-  GENERATE_VAR_LAYER(label1, desclabel, inputs);
-
-  TensorDesc descmatlabel(ge::Shape({1, 1001, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  GENERATE_CONSTANT_USE_DESC(matvar, descmatlabel, 0.01);
-  GENERATE_VAR_LAYER(matvar, descmatlabel, inputs);
-  GENERATE_CONSTANT_USE_DESC(matvar1, descmatlabel, 0.01);
-  GENERATE_VAR_LAYER(matvar1, descmatlabel, inputs);
-
-  MAKE_RESNET50_VAR(inputs);
-
-  TensorDesc ctrl(ge::Shape({1, 1, 1, 1}), FORMAT_NCHW, DT_INT64);
-
-  GENERATE_CONSTANT_USE_DESC(iterations_per_loop, ctrl, 100);
-  GENERATE_VAR_LAYER_1(iterations_per_loop, ctrl, "4D", inputs, "npu_runconfig/iterations_per_loop");
-  GENERATE_CONSTANT_USE_DESC(loop_cond, ctrl, 0);
-  GENERATE_VAR_LAYER_1(loop_cond, ctrl, "4D", inputs, "npu_runconfig/loop_cond");
-  GENERATE_CONSTANT_USE_DESC(one, ctrl, 1);
-  GENERATE_VAR_LAYER_1(one, ctrl, "4D", inputs, "npu_runconfig/one");
-  GENERATE_CONSTANT_USE_DESC(zero, ctrl, 0);
-  GENERATE_VAR_LAYER_1(zero, ctrl, "4D", inputs, "npu_runconfig/zero");
-
-  graph.SetInputs(inputs).SetOutputs(outputs);
-  return 0;
-}
-int TestBuildGraphTest(Func fun, Graph &graph, vector<ge::Tensor> &inputs, vector<ge::Tensor> &outputs) {
-  bool graph_ret = fun(graph);
-  ge::Tensor shapeTensor;
-  TensorDesc shape_desc(ge::Shape({32, 3, 224, 224}), FORMAT_NCHW, DT_FLOAT);
-  uint32_t sizeshape = shape_desc.GetShape().GetShapeSize();
-  printf("[test] desc size filter shape:%u\n", sizeshape);
-  shapeTensor.SetTensorDesc(shape_desc);
-  vector<float> dataValuec;
-  for (int i = 0; i < sizeshape; i++) {
-    dataValuec.push_back(1);
-  }
-
-  shapeTensor.SetData((uint8_t *)dataValuec.data(), 4 * sizeshape);
-  inputs.push_back(shapeTensor);
-
-  ge::Tensor shapeTensor1;
-  TensorDesc shape_desc1(ge::Shape({1, 32, 1, 1}), FORMAT_NCHW, DT_FLOAT);
-  uint32_t sizeshape1 = shape_desc1.GetShape().GetShapeSize();
-  printf("[test] desc size filter shape:%u\n", sizeshape1);
-  shapeTensor1.SetTensorDesc(shape_desc1);
-  vector<int32_t> dataValuec1;
-  for (int i = 0; i < sizeshape1; i++) {
-    dataValuec1.push_back(1);
-  }
-
-  shapeTensor1.SetData((uint8_t *)dataValuec1.data(), 4 * sizeshape1);
-
-  return 0;
-}
-int runTrainGraph(Func fun, int loopCount) {
-  printf("GE BBIT begin...\n");
-  std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
-
-  std::map<std::string, std::string> ge_options = {
-      {"device_id", "0"}, {"rank_table_file", ""}, {"graphType", "1"}, {"ge.graphRunMode", "2"}};
-
-  std::map<std::string, std::string> session_options = {{"a", "b"}, {TRAIN_FLAG, "1"}};
-
-  ge::Status ret;
-
-  // init ge
-  ret = GEInitialize_api_new("train", "fe,plugin");
-  printf("ge::GEInitialize ret:%d\n", ret);
-
-  // init session
-  ge::Session session(session_options);
-
-  int graphId_initvar = 1;
-  ge::Graph graph_initvar("initVarGraph");
-  bool graph_ret = BuildInitVarGraph(graph_initvar);
-
-  // session addgraph
-  int graphId = 0;
-
-  // build graph
-  ge::Graph graph("bigGraph");
-  std::vector<ge::Tensor> inputs;
-  ge::Tensor outputTensor;
-  std::vector<ge::Tensor> outputs;
-  graph_ret = TestBuildGraphTest(fun, graph, inputs, outputs);
-  printf("TestReluGrad ret:%d\n", graph_ret);
-
-  ret = session.AddGraph(graphId_initvar, graph_initvar);
-  printf("session.AddVarGraph ret:%d\n", ret);
-  if (ret) return ret;
-
-  ret = session.AddGraph(graphId, graph);
-  printf("session.AddGraph ret:%d\n", ret);
-  if (ret) return ret;
-
-  std::vector<ge::Tensor> inputs1;
-  std::vector<ge::Tensor> outputs1;
-  ret = session.RunGraph(graphId_initvar, inputs1, outputs1);
-
-  if (ret != SUCCESS) {
-    return ret;
-  }
-  // add loop for test of stabilty:
-  for (int i = 0; i < loopCount; i++) {
-    // session rungraph
-    printf("loopCount:%d\n", loopCount);
-    ret = session.RunGraph(graphId, inputs, outputs);
-    printf("session.RunGraph ret:%d\n", ret);
-    if (ret) return ret;
-
-    // define 99999 as loop forever
-    if (loopCount == 99999) i = 0;
-  }
-  std::chrono::system_clock::time_point end = std::chrono::system_clock::now();
-  auto millisecondsduration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
-  auto ms = millisecondsduration.count();
-  std::stringstream ss;
-  ss << ms << "ms";
-  std::string run_time = ss.str();
-  printf("run time is : %s \n", run_time.c_str());
-
-  return 0;
-}
-
-int main(int argc, char *argv[]) {
-  // add loop for test of stabilty:
-  int loopCount = 1;
-  if (argc >= 2) loopCount = atoi(argv[1]);
-
-  Status ret = SUCCESS;
-  ret = runTrainGraph(resnet50, loopCount);
-  if (ret == SUCCESS) {
-    std::cout << "[train resnet50 success]" << std::endl;
-  } else {
-    std::cout << "!!! train resnet50 fail !!!" << std::endl;
-  }
-  return ret;
-}
diff --git a/tests/st/test_ge_st.py b/tests/st/test_ge_st.py
deleted file mode 100644
index b5479cfc..00000000
--- a/tests/st/test_ge_st.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2019-2020 Huawei Technologies Co., Ltd
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ============================================================================
-
-"""
-ge st test.
-"""
-import pytest
-import subprocess
-import os
-
-@pytest.mark.level0
-@pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
-@pytest.mark.env_card
-@pytest.mark.component_ge
-def test_resnet50_train():
-    ge_st_dir=os.environ.get('GE_ST_DIR',
-            '/home/jenkins/workspace/release_pkg/gate/graphengine_lib')
-    ge_lib_dir=os.environ.get('GRAPHENGINE_LIB', '/home/jenkins/workspace/release_pkg/gate/graphengine_lib')
-
-    real_pythonpath=os.environ.get('REAL_PYTHONPATH')
-    pythonpath=os.environ.get('PYTHONPATH')
-    if real_pythonpath:
-        if pythonpath:
-            os.environ['PYTHONPATH']=real_pythonpath+':'+pythonpath
-        else:
-            os.environ['PYTHONPATH']=real_pythonpath
-    print('PYTHONPATH: '+os.environ.get('PYTHONPATH'))
-
-    os.environ['ASCEND_OPP_PATH']='/usr/local/Ascend/opp'
-    os.environ['ASCEND_ENGINE_PATH']='/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libaicpu_engine.so:' \
-                                     '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/libfe.so:' \
-                                     '/usr/local/Ascend/fwkacllib/lib64/plugin/opskernel/librts_engine.so:'+ \
-                                     ge_lib_dir + '/libge_local_engine.so'
-    print('ASCEND_OPP_PATH: '+os.environ.get('ASCEND_OPP_PATH'))
-    print('ASCEND_ENGINE_PATH: '+os.environ.get('ASCEND_ENGINE_PATH'))
-    print('LD_LIBRARY_PATH: '+os.environ.get('LD_LIBRARY_PATH'))
-
-    cmd=ge_st_dir + '/st_resnet50_train'
-    print('cmd: '+cmd)
-    os.environ['SLOG_PRINT_TO_STDOUT']="1"
-    ret=subprocess.call([cmd], shell=True)
-    assert ret==0
-

From e9e5dd7b9dcaf898a5d4e942a50ac0f2deb05bf6 Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Mon, 7 Dec 2020 17:14:14 +0800
Subject: [PATCH 5/9] fix geruntime missing files and error codes

---
 ge/ge_runtime/CMakeLists.txt   | 3 +++
 ge/ge_runtime/runtime_model.cc | 4 ++--
 ge/ge_runtime/task/task.h      | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ge/ge_runtime/CMakeLists.txt b/ge/ge_runtime/CMakeLists.txt
index 42d3b344..ce1b89ea 100644
--- a/ge/ge_runtime/CMakeLists.txt
+++ b/ge/ge_runtime/CMakeLists.txt
@@ -13,6 +13,9 @@ set(GE_SRC_LIST
     "task/hccl_task.cc"
     "task/memcpy_async_task.cc"
     "task/profiler_task.cc"
+    "task/label_goto_task.cc"
+    "task/label_set_task.cc"
+    "task/label_switch_task.cc"
 )
 
 add_library(ge_runtime SHARED ${GE_SRC_LIST})
diff --git a/ge/ge_runtime/runtime_model.cc b/ge/ge_runtime/runtime_model.cc
index 0ff56ef1..fb0f3e85 100644
--- a/ge/ge_runtime/runtime_model.cc
+++ b/ge/ge_runtime/runtime_model.cc
@@ -307,8 +307,8 @@ bool RuntimeModel::Run() {
 
   ret = rtStreamSynchronize(rt_model_stream_);
   if (ret != RT_ERROR_NONE) {
-    if (ret == RT_ERROR_END_OF_SEQUENCE) {
-      GELOGI("Model stream RT_ERROR_END_OF_SEQUENCE signal received, ret = 0x%X", ret);
+    if (ret == ACL_ERROR_RT_END_OF_SEQUENCE) {
+      GELOGI("Model stream ACL_ERROR_RT_END_OF_SEQUENCE signal received, ret = 0x%X", ret);
       return true;
     }
     GELOGE(RT_FAILED, "Model stream sync failed, ret = 0x%X", ret);
diff --git a/ge/ge_runtime/task/task.h b/ge/ge_runtime/task/task.h
index 6c4df248..c255fd22 100644
--- a/ge/ge_runtime/task/task.h
+++ b/ge/ge_runtime/task/task.h
@@ -24,6 +24,7 @@
 #include "runtime/rt_model.h"
 #include "ge_runtime/model_context.h"
 #include "ge_runtime/task_info.h"
+#include "external/runtime/rt_error_codes.h"
 
 namespace ge {
 namespace model_runner {

From b8e82bb16e1b72d5aee31a6f7354d4efa0b4f4e3 Mon Sep 17 00:00:00 2001
From: lichun <lichun30@hisilicon.com>
Date: Mon, 7 Dec 2020 19:57:03 +0800
Subject: [PATCH 6/9] inference supports dynamic shape

---
 ge/common/ge/op_tiling_manager.cc             |   4 +
 ge/common/ge/op_tiling_manager.h              |   1 +
 ge/executor/CMakeLists.txt                    |  94 ++++++++-
 ge/executor/ge_executor.cc                    |  73 ++++++-
 ge/executor/module.mk                         |  84 +++++++-
 ge/ge_local_engine/CMakeLists.txt             |   2 +-
 ge/ge_local_engine/engine/host_cpu_engine.cc  |  10 +-
 ge/ge_local_engine/engine/host_cpu_engine.h   |   2 +-
 ge/graph/build/graph_builder.cc               |  51 +++++
 ge/graph/load/graph_loader.cc                 |   7 +-
 ge/graph/load/graph_loader.h                  |   3 +-
 .../load/new_model_manager/davinci_model.cc   |  88 ++++----
 .../load/new_model_manager/davinci_model.h    |   7 +-
 .../load/new_model_manager/model_manager.cc   |  48 ++++-
 .../load/new_model_manager/model_manager.h    |   5 +-
 ge/graph/partition/dynamic_shape_partition.cc |  47 ++++-
 ge/graph/partition/dynamic_shape_partition.h  |   1 +
 ge/graph/passes/pass_utils.cc                 |   4 -
 .../passes/transop_breadth_fusion_pass.cc     |   2 +-
 ge/host_cpu_engine/CMakeLists.txt             |   6 +-
 ge/host_kernels/floordiv_kernel.cc            |   4 +-
 ge/host_kernels/floordiv_kernel.h             |   4 -
 ge/host_kernels/ssd_prior_box_kernel.cc       |   6 +-
 ge/hybrid/executor/hybrid_execution_context.h |   2 +-
 .../executor/hybrid_model_async_executor.cc   |  38 ++++
 .../executor/hybrid_model_async_executor.h    |   5 +
 ge/hybrid/executor/hybrid_profiler.h          |   2 +-
 ge/hybrid/executor/node_state.h               |   2 +-
 ge/hybrid/hybrid_davinci_model.cc             |  79 ++++++++
 ge/hybrid/hybrid_davinci_model.h              |  21 ++
 ge/hybrid/hybrid_davinci_model_stub.cc        |  32 +++
 ge/hybrid/model/hybrid_model.cc               | 188 +++++++++++++++++-
 ge/hybrid/model/hybrid_model.h                |  26 +++
 ge/hybrid/model/hybrid_model_builder.cc       |  31 ++-
 .../node_executor/aicore/aicore_op_task.cc    |  56 ++++++
 .../node_executor/aicore/aicore_op_task.h     |   1 +
 .../aicore/aicore_task_compiler.h             |   2 +-
 .../node_executor/aicpu/aicpu_node_executor.h |   2 +
 .../controlop/control_op_executor.h           |   1 +
 .../ge_local/ge_local_node_executor.cc        |   2 +-
 .../host_cpu/kernel/assign_kernel.cc          |   1 -
 ge/hybrid/node_executor/node_executor.cc      |   1 -
 .../partitioned_call_node_executor.h          |   1 -
 ge/hybrid/node_executor/task_context.h        |   2 +-
 inc/framework/executor/ge_executor.h          |  16 ++
 45 files changed, 976 insertions(+), 88 deletions(-)

diff --git a/ge/common/ge/op_tiling_manager.cc b/ge/common/ge/op_tiling_manager.cc
index 9b5ba2d7..db959368 100644
--- a/ge/common/ge/op_tiling_manager.cc
+++ b/ge/common/ge/op_tiling_manager.cc
@@ -88,4 +88,8 @@ void OpTilingManager::LoadSo() {
   }
 }
 
+OpTilingManager &OpTilingManager::GetInstance() {
+  static OpTilingManager instance;
+  return instance;
+}
 }  // namespace ge
diff --git a/ge/common/ge/op_tiling_manager.h b/ge/common/ge/op_tiling_manager.h
index d4e7f34e..17761969 100644
--- a/ge/common/ge/op_tiling_manager.h
+++ b/ge/common/ge/op_tiling_manager.h
@@ -25,6 +25,7 @@ using SoToHandleMap = std::map<std::string, void *>;
 class OpTilingManager {
  public:
   OpTilingManager() = default;
+  static OpTilingManager &GetInstance();
   ~OpTilingManager();
   void LoadSo();
 
diff --git a/ge/executor/CMakeLists.txt b/ge/executor/CMakeLists.txt
index de8025f3..d7dfdc84 100644
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@@ -72,7 +72,89 @@ set(SRC_LIST
     "../single_op/task/tbe_task_builder.cc"
     "../single_op/task/aicpu_task_builder.cc"
     "../single_op/task/aicpu_kernel_task_builder.cc"
-    "../hybrid/hybrid_davinci_model_stub.cc"
+    "../hybrid/common/tensor_value.cc"
+    "../hybrid/common/npu_memory_allocator.cc"
+    "../hybrid/executor/rt_callback_manager.cc"
+    "../hybrid/executor/node_state.cc"
+    "../hybrid/executor/node_done_manager.cc"
+    "../hybrid/executor/hybrid_profiler.cc"
+    "../hybrid/executor/hybrid_model_executor.cc"
+    "../hybrid/executor/hybrid_model_async_executor.cc"
+    "../hybrid/executor/hybrid_execution_context.cc"
+    "../hybrid/executor/subgraph_context.cc"
+    "../hybrid/executor/subgraph_executor.cc"
+    "../hybrid/executor/worker/task_compile_engine.cc"
+    "../hybrid/executor/worker/shape_inference_engine.cc"
+    "../hybrid/executor/worker/execution_engine.cc"
+    "../hybrid/model/hybrid_model.cc"
+    "../hybrid/model/hybrid_model_builder.cc"
+    "../hybrid/model/node_item.cc"
+    "../hybrid/model/graph_item.cc"
+    "../hybrid/node_executor/aicore/aicore_node_executor.cc"
+    "../hybrid/node_executor/aicore/aicore_op_task.cc"
+    "../hybrid/node_executor/aicore/aicore_task_builder.cc"
+    "../hybrid/node_executor/aicpu/aicpu_node_executor.cc"
+    "../hybrid/node_executor/compiledsubgraph/known_node_executor.cc"
+    "../hybrid/node_executor/ge_local/ge_local_node_executor.cc"
+    "../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc"
+    "../hybrid/node_executor/host_cpu/kernel_factory.cc"
+    "../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc"
+    "../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc"
+    "../hybrid/node_executor/controlop/control_op_executor.cc"
+    "../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc"
+    "../hybrid/node_executor/rts/rts_node_executor.cc"
+    "../hybrid/node_executor/node_executor.cc"
+    "../hybrid/node_executor/task_context.cc"
+    "../hybrid/hybrid_davinci_model.cc"
+    "../ge_local_engine/engine/host_cpu_engine.cc"
+    "../graph/common/omg_util.cc"
+    "../graph/manager/host_mem_manager.cc"
+    "../graph/build/memory/var_mem_assign_util.cc"
+    "../host_kernels/transpose_kernel.cc"
+    "../host_kernels/add_kernel.cc"
+    "../host_kernels/broadcast_args_kernel.cc"
+    "../host_kernels/broadcast_gradient_args_kernel.cc"
+    "../host_kernels/cast_kernel.cc"
+    "../host_kernels/concat_offset_kernel.cc"
+    "../host_kernels/concat_v2_kernel.cc"
+    "../host_kernels/dynamic_stitch_kernel.cc"
+    "../host_kernels/identity_kernel.cc"
+    "../host_kernels/empty_kernel.cc"
+    "../host_kernels/expanddims_kernel.cc"
+    "../host_kernels/fill_kernel.cc"
+    "../host_kernels/floordiv_kernel.cc"
+    "../host_kernels/floormod_kernel.cc"
+    "../host_kernels/gather_v2_kernel.cc"
+    "../host_kernels/greater_kernel.cc"
+    "../host_kernels/kernel_utils.cc"
+    "../host_kernels/maximum_kernel.cc"
+    "../host_kernels/mul_kernel.cc"
+    "../host_kernels/pack_kernel.cc"
+    "../host_kernels/permute_kernel.cc"
+    "../host_kernels/range_kernel.cc"
+    "../host_kernels/rank_kernel.cc"
+    "../host_kernels/reduce_prod_kernel.cc"
+    "../host_kernels/reshape_kernel.cc"
+    "../host_kernels/rsqrt_kernel.cc"
+    "../host_kernels/shape_kernel.cc"
+    "../host_kernels/shape_n_kernel.cc"
+    "../host_kernels/size_kernel.cc"
+    "../host_kernels/slice_d_kernel.cc"
+    "../host_kernels/slice_kernel.cc"
+    "../host_kernels/squeeze_kernel.cc"
+    "../host_kernels/unsqueeze_kernel.cc"
+    "../host_kernels/ssd_prior_box_kernel.cc"
+    "../host_kernels/strided_slice_kernel.cc"
+    "../host_kernels/sub_kernel.cc"
+    "../host_kernels/transdata_kernel.cc"
+    "../host_kernels/unpack_kernel.cc"
+    "../graph/passes/pass_utils.cc"
+    "../graph/common/bcast.cc"
+    "../common/fp16_t.cc"
+    "../common/formats/format_transfers/format_transfer_transpose.cc"
+    "../common/formats/utils/formats_trans_utils.cc"
 )
 
 ######## libge_executor.a ########
@@ -105,9 +187,9 @@ target_include_directories(ge_executor PRIVATE
     ${CMAKE_BINARY_DIR}/proto/ge
     #### yellow zone ####
     ${GE_CODE_DIR}/../inc
-    ${GE_CODE_DIR}/../inc/cce   
+    ${GE_CODE_DIR}/../inc/cce
     #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc 
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
 target_link_libraries(ge_executor PRIVATE
@@ -147,9 +229,9 @@ target_include_directories(ge_executor_shared PRIVATE
     ${CMAKE_BINARY_DIR}/proto/ge
     #### yellow zone ####
     ${GE_CODE_DIR}/../inc
-    ${GE_CODE_DIR}/../inc/cce   
+    ${GE_CODE_DIR}/../inc/cce
     #### blue zone ####
-    ${GE_CODE_DIR}/third_party/fwkacllib/inc 
+    ${GE_CODE_DIR}/third_party/fwkacllib/inc
 )
 
 target_link_libraries(ge_executor_shared PRIVATE
@@ -158,7 +240,7 @@ target_link_libraries(ge_executor_shared PRIVATE
     -Wl,--no-as-needed
     ge_common
     runtime
-    slog 
+    slog
     mmpa
     graph
     register
diff --git a/ge/executor/ge_executor.cc b/ge/executor/ge_executor.cc
index add95372..3e916916 100755
--- a/ge/executor/ge_executor.cc
+++ b/ge/executor/ge_executor.cc
@@ -39,6 +39,8 @@
 #include "graph/manager/graph_var_manager.h"
 #include "graph/load/new_model_manager/davinci_model.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
+#include "graph/opsproto_manager.h"
+#include "ge_local_engine/engine/host_cpu_engine.h"
 
 using std::string;
 using std::vector;
@@ -221,6 +223,33 @@ class ModelListenerAdapter : public ModelListener {
   std::shared_ptr<ge::ModelListener> listener;
 };
 
+static void InitOpsProtoManger() {
+  string opsproto_path;
+  const char *path_env = std::getenv("ASCEND_OPP_PATH");
+  if (path_env != nullptr) {
+    string path = path_env;
+    string file_path = RealPath(path.c_str());
+    if (file_path.empty()) {
+      GELOGE(FAILED, "File path %s is invalid.", path.c_str());
+      return;
+    }
+    opsproto_path = (path + "/op_proto/custom/" + ":") + (path + "/op_proto/built-in/");
+    GELOGI("Get opsproto so path from env : %s", path.c_str());
+  } else {
+    string path_base = PluginManager::GetPath();
+    GELOGI("path_base is %s", path_base.c_str());
+    path_base = path_base.substr(0, path_base.rfind('/'));
+    path_base = path_base.substr(0, path_base.rfind('/') + 1);
+    opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
+  }
+
+  GELOGI("Get opsproto path is %s", opsproto_path.c_str());
+  OpsProtoManager *manager = OpsProtoManager::Instance();
+  map<string, string> option_tmp;
+  option_tmp.emplace(std::pair<string, string>(string("ge.opsProtoLibPath"), opsproto_path));
+  (void)manager->Initialize(option_tmp);
+}
+
 GeExecutor::GeExecutor() {}
 
 Status GeExecutor::Initialize() {
@@ -230,6 +259,16 @@ Status GeExecutor::Initialize() {
     return ge::SUCCESS;
   }
 
+  OpTilingManager::GetInstance().LoadSo();
+
+  Status initHostCpuEngineStatus = HostCpuEngine::GetInstance().Initialize();
+  if (initHostCpuEngineStatus != SUCCESS) {
+    GELOGE(initHostCpuEngineStatus, "Failed to initialize HostCpuEngine");
+    return initHostCpuEngineStatus;
+  }
+
+  InitOpsProtoManger();
+
   std::vector<rtMemType_t> mem_type(1, RT_MEMORY_HBM);
   mem_type.push_back(RT_MEMORY_P2P_DDR);
   auto ret = MemManager::Instance().Initialize(mem_type);
@@ -600,10 +639,16 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
     return ACL_ERROR_GE_INTERNAL_ERROR;
   }
 
-  std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
-  if (davinci_model != nullptr) {
-    uint64_t session_id = davinci_model->GetSessionId();
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = ModelManager::GetInstance()->GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    uint64_t session_id = hybrid_davinci_model->GetSessionId();
     VarManagerPool::Instance().RemoveVarManager(session_id);
+  } else {
+    std::shared_ptr<DavinciModel> davinci_model = ModelManager::GetInstance()->GetModel(model_id);
+    if (davinci_model != nullptr) {
+      uint64_t session_id = davinci_model->GetSessionId();
+      VarManagerPool::Instance().RemoveVarManager(session_id);
+    }
   }
   ret = GraphLoader::UnloadModel(model_id);
   if (ret != SUCCESS) {
@@ -933,6 +978,26 @@ Status GeExecutor::LoadModelWithQ(uint32_t &model_id, const ModelData &model_dat
 */
 Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
                              ge::RunModelData &run_output_data, bool async_mode) {
+  std::vector<GeTensorDesc> input_desc = {};
+  std::vector<GeTensorDesc> output_desc = {};
+  return ExecModel(model_id, stream, run_input_data, input_desc, run_output_data, output_desc, async_mode);
+}
+
+/**
+* @ingroup ge
+* @brief Synchronous execution of offline model(Do not create thread)
+* @param [in] uint32_t model_id: Model ID to execute
+              void* stream: stream to execute
+              const domi::InputData *input_data: Model input data
+              const std::vector<GeTensorDesc> &input_desc: Description of model input data
+              bool async_mode: is asynchronize mode
+* @param [out] domi::OutputData *output_data: Model output data
+* @param [out] std::vector<GeTensorDesc> &output_desc: Description of model output data
+* @return SUCCESS handle successfully / others handle failed
+*/
+Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
+                             const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
+                             std::vector<GeTensorDesc> &output_desc, bool async_mode) {
   if (!isInit_) {
     GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
     return ACL_ERROR_GE_EXEC_NOT_INIT;
@@ -957,7 +1022,7 @@ Status GeExecutor::ExecModel(uint32_t model_id, void *stream, const ge::RunModel
     }
   }
 
-  return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, output_data);
+  return GraphLoader::ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
 }
 
 /**
diff --git a/ge/executor/module.mk b/ge/executor/module.mk
index 4a0188be..9566ca64 100644
--- a/ge/executor/module.mk
+++ b/ge/executor/module.mk
@@ -61,9 +61,91 @@ local_ge_executor_src_files :=  \
     ../single_op/task/tbe_task_builder.cc \
     ../single_op/task/aicpu_task_builder.cc \
     ../single_op/task/aicpu_kernel_task_builder.cc \
-    ../hybrid/hybrid_davinci_model_stub.cc\
     ../hybrid/node_executor/aicpu/aicpu_ext_info.cc \
     ../graph/common/local_context.cc \
+    ../hybrid/common/tensor_value.cc                                        \
+    ../hybrid/common/npu_memory_allocator.cc                                \
+    ../hybrid/executor/rt_callback_manager.cc                               \
+    ../hybrid/executor/node_state.cc                                        \
+    ../hybrid/executor/node_done_manager.cc                                 \
+    ../hybrid/executor/hybrid_profiler.cc                                   \
+    ../hybrid/executor/hybrid_model_executor.cc                             \
+    ../hybrid/executor/hybrid_model_async_executor.cc                       \
+    ../hybrid/executor/hybrid_execution_context.cc                          \
+    ../hybrid/executor/subgraph_context.cc                                  \
+    ../hybrid/executor/subgraph_executor.cc                                 \
+    ../hybrid/executor/worker/task_compile_engine.cc                        \
+    ../hybrid/executor/worker/shape_inference_engine.cc                     \
+    ../hybrid/executor/worker/execution_engine.cc                           \
+    ../hybrid/model/hybrid_model.cc                                         \
+    ../hybrid/model/hybrid_model_builder.cc                                 \
+    ../hybrid/model/node_item.cc                                            \
+    ../hybrid/model/graph_item.cc                                           \
+    ../hybrid/node_executor/aicore/aicore_node_executor.cc                  \
+    ../hybrid/node_executor/aicore/aicore_op_task.cc                        \
+    ../hybrid/node_executor/aicore/aicore_task_builder.cc                   \
+    ../hybrid/node_executor/aicpu/aicpu_node_executor.cc                    \
+    ../hybrid/node_executor/compiledsubgraph/known_node_executor.cc         \
+    ../hybrid/node_executor/ge_local/ge_local_node_executor.cc              \
+    ../hybrid/node_executor/host_cpu/host_cpu_node_executor.cc              \
+    ../hybrid/node_executor/host_cpu/kernel_factory.cc                      \
+    ../hybrid/node_executor/host_cpu/kernel/no_op_kernel.cc                 \
+    ../hybrid/node_executor/host_cpu/kernel/variable_kernel.cc              \
+    ../hybrid/node_executor/host_cpu/kernel/assign_kernel.cc                \
+    ../hybrid/node_executor/host_cpu/kernel/random_uniform_kernel.cc        \
+    ../hybrid/node_executor/controlop/control_op_executor.cc                \
+    ../hybrid/node_executor/partitioned_call/partitioned_call_node_executor.cc \
+    ../hybrid/node_executor/rts/rts_node_executor.cc                        \
+    ../hybrid/node_executor/node_executor.cc                                \
+    ../hybrid/node_executor/task_context.cc                                 \
+    ../hybrid/hybrid_davinci_model.cc                                       \
+    ../ge_local_engine/engine/host_cpu_engine.cc \
+    ../graph/common/omg_util.cc \
+    ../graph/manager/host_mem_manager.cc \
+    ../graph/build/memory/var_mem_assign_util.cc \
+    ../host_kernels/transpose_kernel.cc \
+    ../host_kernels/add_kernel.cc \
+    ../host_kernels/broadcast_args_kernel.cc \
+    ../host_kernels/broadcast_gradient_args_kernel.cc \
+    ../host_kernels/cast_kernel.cc \
+    ../host_kernels/concat_offset_kernel.cc \
+    ../host_kernels/concat_v2_kernel.cc \
+    ../host_kernels/dynamic_stitch_kernel.cc \
+    ../host_kernels/identity_kernel.cc \
+    ../host_kernels/empty_kernel.cc \
+    ../host_kernels/expanddims_kernel.cc \
+    ../host_kernels/fill_kernel.cc \
+    ../host_kernels/floordiv_kernel.cc \
+    ../host_kernels/floormod_kernel.cc \
+    ../host_kernels/gather_v2_kernel.cc  \
+    ../host_kernels/greater_kernel.cc \
+    ../host_kernels/kernel_utils.cc \
+    ../host_kernels/maximum_kernel.cc \
+    ../host_kernels/mul_kernel.cc \
+    ../host_kernels/pack_kernel.cc \
+    ../host_kernels/permute_kernel.cc \
+    ../host_kernels/range_kernel.cc \
+    ../host_kernels/rank_kernel.cc \
+    ../host_kernels/reduce_prod_kernel.cc \
+    ../host_kernels/reshape_kernel.cc \
+    ../host_kernels/rsqrt_kernel.cc \
+    ../host_kernels/shape_kernel.cc \
+    ../host_kernels/shape_n_kernel.cc \
+    ../host_kernels/size_kernel.cc \
+    ../host_kernels/slice_d_kernel.cc \
+    ../host_kernels/slice_kernel.cc \
+    ../host_kernels/squeeze_kernel.cc \
+    ../host_kernels/unsqueeze_kernel.cc \
+    ../host_kernels/ssd_prior_box_kernel.cc \
+    ../host_kernels/strided_slice_kernel.cc \
+    ../host_kernels/sub_kernel.cc \
+    ../host_kernels/transdata_kernel.cc \
+    ../host_kernels/unpack_kernel.cc \
+    ../graph/passes/pass_utils.cc \
+    ../graph/common/bcast.cc \
+    ../common/fp16_t.cc \
+    ../common/formats/format_transfers/format_transfer_transpose.cc \
+    ../common/formats/utils/formats_trans_utils.cc \
 
 local_ge_executor_c_include :=             \
     proto/insert_op.proto                  \
diff --git a/ge/ge_local_engine/CMakeLists.txt b/ge/ge_local_engine/CMakeLists.txt
index 76590172..615a968f 100755
--- a/ge/ge_local_engine/CMakeLists.txt
+++ b/ge/ge_local_engine/CMakeLists.txt
@@ -195,7 +195,7 @@ set_target_properties(atc_ge_local_opskernel_builder PROPERTIES
 )
 
 ############ libge_local_opskernel_builder.a ############
-add_library(ge_local_opskernel_builder_static SHARED ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
+add_library(ge_local_opskernel_builder_static STATIC ${OPS_KERNEL_SRC_LIST} ${PROTO_HDRS})
 
 target_compile_options(ge_local_opskernel_builder_static PRIVATE
     -Werror
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.cc b/ge/ge_local_engine/engine/host_cpu_engine.cc
index b14cbb3d..c836d4d6 100755
--- a/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -95,8 +95,8 @@ Status GetDataNumber(const GeTensorDesc &out_desc, uint64_t &data_num) {
 
 void HostCpuEngine::CloseSo() {
   for (auto handle : lib_handles_) {
-    if (dlclose(handle) != 0) {
-      GELOGW("failed to close handle, message: %s", dlerror());
+    if (mmDlclose(handle) != 0) {
+      GELOGW("failed to close handle, message: %s", mmDlerror());
     }
   }
   lib_handles_.clear();
@@ -322,13 +322,13 @@ Status HostCpuEngine::LoadLibs(std::vector<std::string> &lib_paths) {
 
 Status HostCpuEngine::LoadLib(const std::string &lib_path) {
   GELOGI("To invoke dlopen on lib: %s", lib_path.c_str());
-  auto handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
+  auto handle = mmDlopen(lib_path.c_str(), MMPA_RTLD_NOW | MMPA_RTLD_GLOBAL);
   if (handle == nullptr) {
-    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), dlerror());
+    GELOGE(INTERNAL_ERROR, "Failed to invoke dlopen. path = %s, error = %s", lib_path.c_str(), mmDlerror());
     return INTERNAL_ERROR;
   }
 
-  auto initialize = (Status (*)(const HostCpuContext &))dlsym(handle, "Initialize");
+  auto initialize = (Status (*)(const HostCpuContext &))mmDlsym(handle, "Initialize");
   if (initialize != nullptr) {
     GELOGI("Invoke function Initialize in lib: %s", lib_path.c_str());
     if (initialize(HostCpuContext()) != SUCCESS) {
diff --git a/ge/ge_local_engine/engine/host_cpu_engine.h b/ge/ge_local_engine/engine/host_cpu_engine.h
index cc6b578c..0b99ecac 100644
--- a/ge/ge_local_engine/engine/host_cpu_engine.h
+++ b/ge/ge_local_engine/engine/host_cpu_engine.h
@@ -20,7 +20,7 @@
 #include "framework/common/ge_inner_error_codes.h"
 #include "graph/node.h"
 #include "graph/operator.h"
-#include "register/register.h"
+#include "external/../register/register.h"
 
 namespace ge {
 class HostCpuEngine {
diff --git a/ge/graph/build/graph_builder.cc b/ge/graph/build/graph_builder.cc
index 0fa1e1ee..19c0083c 100644
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@@ -30,6 +30,7 @@
 #include "model/ge_model.h"
 #include "graph/ge_context.h"
 #include "opskernel_manager/ops_kernel_builder_manager.h"
+#include "graph/utils/op_desc_utils.h"
 
 using domi::BuildMode;
 
@@ -311,6 +312,53 @@ Status GraphBuilder::BuildForHostCpuGraph(ComputeGraphPtr &comp_graph, GeModelPt
   return BuildForUnknownShapeGraph(comp_graph, ge_model_ptr, session_id);
 }
 
+static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchorPtr &out_anchor,
+                               const std::vector<InDataAnchorPtr> &in_anchors, const std::string &name) {
+  GE_CHECK_NOTNULL(out_anchor);
+  NodePtr in_node = out_anchor->GetOwnerNode();
+  GE_CHECK_NOTNULL(in_node);
+  OpDescBuilder op_desc_builder(name, MEMCPYADDRASYNC);
+  OpDescPtr op_desc = op_desc_builder.AddInput("x", in_node->GetOpDesc()->GetOutputDesc(0))
+                                     .AddOutput("y", in_node->GetOpDesc()->GetOutputDesc(0))
+                                     .Build();
+  (void)AttrUtils::SetBool(op_desc, ATTR_NO_NEED_CONSTANT_FOLDING, false);
+  if (GraphUtils::InsertNodeAfter(out_anchor, in_anchors, graph->AddNode(op_desc)) != GRAPH_SUCCESS) {
+    GELOGE(FAILED, "Insert IDENTITY node %s after %s failed.", name.c_str(), in_node->GetName().c_str());
+    return FAILED;
+  }
+  return SUCCESS;
+}
+
+static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
+  for (auto &node : graph->GetDirectNode()) {
+    // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
+    auto op_desc = node->GetOpDesc();
+    if (op_desc == nullptr) {
+      continue;
+    }
+    auto op_type = op_desc->GetType();
+    if (op_type == NETOUTPUT) {
+      for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
+        const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
+        GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
+        NodePtr in_node = peer_out_anchor->GetOwnerNode();
+        GE_CHECK_NOTNULL(in_node);
+
+        std::string in_node_op_type = in_node->GetType();
+        if (in_node_op_type == CONSTANT) {
+          GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
+          std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
+          if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
+            GELOGE(FAILED, "Insert memcpy between %s and %s failed.", in_node->GetName().c_str(), node->GetName().c_str());
+            return FAILED;
+          }
+        }
+      }
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
                                                std::vector<SubGraphInfoPtr> &subgraph_ptr_list,
                                                GeRootModelPtr &ge_root_model_ptr, GeModelPtr &ge_model_ptr,
@@ -332,6 +380,9 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
     if (sub_graph->GetParentGraph() != comp_graph && !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
       continue;
     }
+
+    GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");
+
     if (sub_graph->GetGraphUnknownFlag()) {
       // unknown shape build flow
       GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
diff --git a/ge/graph/load/graph_loader.cc b/ge/graph/load/graph_loader.cc
index 2eeecc0f..aa825a5d 100755
--- a/ge/graph/load/graph_loader.cc
+++ b/ge/graph/load/graph_loader.cc
@@ -274,13 +274,16 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
 /// @param [in] stream   stream to execute model on
 /// @param [in] async_mode  is asynchronize mode.
 /// @param [in] input_data  model input data
+/// @param [in] input_desc  description of model input data
 /// @param [out] output_data  model output data
+/// @param [out] output_desc  description of model output data
 ///
 Status GraphLoader::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                                 OutputData &output_data) {
+                                 const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                                 std::vector<GeTensorDesc> &output_desc) {
   auto model_manager = ModelManager::GetInstance();
   GE_CHECK_NOTNULL(model_manager);
-  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, output_data);
+  Status ret = model_manager->ExecuteModel(model_id, stream, async_mode, input_data, input_desc, output_data, output_desc);
   if (ret != SUCCESS) {
     GELOGE(ret, "Execute model failed, model_id:%u.", model_id);
     return ret;
diff --git a/ge/graph/load/graph_loader.h b/ge/graph/load/graph_loader.h
index b581f2fa..974af5c1 100755
--- a/ge/graph/load/graph_loader.h
+++ b/ge/graph/load/graph_loader.h
@@ -65,7 +65,8 @@ class GraphLoader {
                                const std::vector<uint32_t> &output_queue_ids);
 
   static Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                             OutputData &output_data);
+                             const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                             std::vector<GeTensorDesc> &output_desc);
 
   static Status DestroyAicpuKernel(uint64_t session_id, uint32_t model_id);
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.cc b/ge/graph/load/new_model_manager/davinci_model.cc
index c660f797..37b1fb4f 100755
--- a/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/ge/graph/load/new_model_manager/davinci_model.cc
@@ -117,7 +117,8 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
       load_end_time_(0),
       time_info_(),
       dataInputTid(0),
-      is_model_has_inited_(false),
+      is_weight_mem_has_inited_(false),
+      is_feature_map_mem_has_inited_(false),
       model_id_(0),
       runtime_model_id_(0),
       version_(0),
@@ -263,34 +264,65 @@ void DavinciModel::Shrink() {
   ge_model_.reset();  // delete object.
 }
 
-Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_ptr, size_t weight_size) {
-  if (is_model_has_inited_) {
-    GELOGE(FAILED, "call InitModelMem more than once .");
+Status DavinciModel::InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size) {
+  if (is_weight_mem_has_inited_) {
+    GELOGE(FAILED, "call InitWeightMem more than once.");
     return FAILED;
   }
-  is_model_has_inited_ = true;
+  is_weight_mem_has_inited_ = true;
 
-  std::size_t data_size = TotalMemSize();
-  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
   const Buffer &weights = ge_model_->GetWeight();
   std::size_t weights_size = weights.GetSize();
   GE_CHECK_LE(weights_size, ALLOC_MEMORY_MAX_SIZE);
 
-  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
-    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
+  if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
+    GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
     return FAILED;
   }
 
-  if ((weight_ptr != nullptr) && (weight_size < weights_size)) {
-    GELOGE(FAILED, "Invalid mem param: weight_size=%zu totalsize=%zu.", weight_size, weights_size);
+  weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
+  is_inner_weight_base_ = false;
+
+  if (weights_size != 0) {
+    weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
+    is_inner_weight_base_ = false;
+    if (weight_ptr == nullptr) {
+      weights_mem_base_ = MallocWeightsMem(weights_size);
+      if (weights_mem_base_ == nullptr) {
+        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
+        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
+      }
+      is_inner_weight_base_ = true;
+    }
+    GELOGI("[IMAS]InitWeightMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+           weights_mem_base_, weights_size);
+    GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
+    GELOGI("copy weights data to device");
+  }
+
+  runtime_param_.weight_base = weights_mem_base_;
+  return SUCCESS;
+}
+
+
+Status DavinciModel::InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size) {
+  if (is_feature_map_mem_has_inited_) {
+    GELOGE(FAILED, "call InitFeatureMapMem more than once .");
+    return FAILED;
+  }
+  is_feature_map_mem_has_inited_ = true;
+
+  std::size_t data_size = TotalMemSize();
+  std::size_t p2p_data_size = P2PMemInfos().at(RT_MEMORY_P2P_DDR).memory_size;
+
+  if ((dev_ptr != nullptr) && (mem_size < TotalMemSize())) {
+    GELOGE(FAILED, "Invalid mem param: mem_size=%zu totalsize=%zu.", mem_size, TotalMemSize());
     return FAILED;
   }
 
   mem_base_ = static_cast<uint8_t *>(dev_ptr);
   p2p_mem_base_ = static_cast<uint8_t *>(dev_ptr);
-  weights_mem_base_ = static_cast<uint8_t *>(dev_ptr);
   is_inner_mem_base_ = false;
-  is_inner_weight_base_ = false;
 
   if (TotalMemSize() && mem_base_ == nullptr) {
     mem_base_ = MallocFeatureMapMem(data_size);
@@ -298,12 +330,14 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
       GELOGE(GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED, "Alloc feature map memory failed. size: %zu", data_size);
       return GE_EXEC_ALLOC_FEATURE_MAP_MEM_FAILED;
     }
-    GEEVENT("[IMAS]InitModelMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+    GEEVENT("[IMAS]InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
             mem_base_, data_size);
-    weights_mem_base_ = mem_base_;
 
+    if (!is_inner_weight_base_) {
+      weights_mem_base_ = mem_base_;
+      is_inner_weight_base_ = true;
+    }
     is_inner_mem_base_ = true;
-    is_inner_weight_base_ = true;
   }
 
   if (p2p_data_size != 0) {
@@ -312,27 +346,11 @@ Status DavinciModel::InitModelMem(void *dev_ptr, size_t mem_size, void *weight_p
       GELOGE(GE_EXEC_ALLOC_P2P_MEM_FAILED, "Alloc p2p memory failed,size: %zu", p2p_data_size);
       return GE_EXEC_ALLOC_P2P_MEM_FAILED;
     }
-    GELOGI("InitModelMem graph_%u MallocMemory type[P] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
+    GELOGI("InitFeatureMapAndP2PMem graph_%u MallocMemory type[F] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
            p2p_mem_base_, p2p_data_size);
     is_inner_p2p_mem_base_ = true;
   }
 
-  if (weights_size != 0) {
-    weights_mem_base_ = static_cast<uint8_t *>(weight_ptr);
-    is_inner_weight_base_ = false;
-    if (weight_ptr == nullptr) {
-      weights_mem_base_ = MallocWeightsMem(weights_size);
-      if (weights_mem_base_ == nullptr) {
-        GELOGE(GE_EXEC_ALLOC_WEIGHT_MEM_FAILED, "Alloc weight memory failed. size: %zu", weights_size);
-        return GE_EXEC_ALLOC_WEIGHT_MEM_FAILED;
-      }
-      is_inner_weight_base_ = true;
-    }
-    GELOGI("[IMAS]InitModelMem graph_%u MallocMemory type[W] memaddr[%p] mem_size[%zu]", runtime_param_.graph_id,
-           weights_mem_base_, weights_size);
-    GE_CHK_RT_RET(rtMemcpy(weights_mem_base_, weights_size, weights.GetData(), weights_size, RT_MEMCPY_HOST_TO_DEVICE));
-  }
-
   GE_CHK_STATUS_RET(InitVariableMem(), "Init variable memory failed.");
   runtime_param_.mem_base = mem_base_;
   runtime_param_.weight_base = weights_mem_base_;
@@ -642,8 +660,9 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
 
   GE_TIMESTAMP_START(InitModelMem);
   GELOGD("Known node is %d", known_node_);
+  GE_CHK_STATUS_RET_NOLOG(InitWeightMem(dev_ptr, weight_ptr, weight_size));
   if (!known_node_) {
-    GE_CHK_STATUS_RET_NOLOG(InitModelMem(dev_ptr, mem_size, weight_ptr, weight_size));
+    GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
     data_inputer_ = new (std::nothrow) DataInputer();
     GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
   }
@@ -1140,6 +1159,7 @@ Status DavinciModel::InitNetOutput(const NodePtr &node) {
     GE_IF_BOOL_EXEC(GetGearAndRealOutShapeInfo(input_count, op_desc) != SUCCESS,
                     GELOGE(PARAM_INVALID, "Failed to get gear and real out shape info."); return PARAM_INVALID;);
   }
+
   return SUCCESS;
 }
 
diff --git a/ge/graph/load/new_model_manager/davinci_model.h b/ge/graph/load/new_model_manager/davinci_model.h
index 893c3d49..650f19eb 100755
--- a/ge/graph/load/new_model_manager/davinci_model.h
+++ b/ge/graph/load/new_model_manager/davinci_model.h
@@ -584,7 +584,8 @@ class DavinciModel {
 
   Status SyncVarData();
 
-  Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize);
+  Status InitWeightMem(void *dev_ptr, void *weight_ptr, size_t weight_size);
+  Status InitFeatureMapAndP2PMem(void *dev_ptr, size_t mem_size);
 
   void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);
 
@@ -850,7 +851,9 @@ class DavinciModel {
   Status GetRealOutputSizeOfMerge(size_t input_index, const NodePtr &merge_node);
   Status GetGearAndRealOutShapeInfo(size_t input_count, const OpDescPtr &op_desc);
 
-  bool is_model_has_inited_;
+  bool is_weight_mem_has_inited_;
+  bool is_feature_map_mem_has_inited_;
+
   uint32_t model_id_;
   uint32_t runtime_model_id_;
   string name_;
diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 080ca889..6f20f63d 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -31,6 +31,7 @@
 #include "model/ge_root_model.h"
 #include "graph/common/local_context.h"
 #include "common/formats/utils/formats_trans_utils.h"
+#include "hybrid/hybrid_davinci_model.h"
 
 namespace ge {
 thread_local uint32_t device_count = 0;
@@ -204,6 +205,13 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
 
 ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
   std::lock_guard<std::mutex> lock(map_mutex_);
+  auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
+  if (hybrid_davinci_model != hybrid_model_map_.end()) {
+    uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
+    DestroyAicpuSession(session_id);
+    return SUCCESS;
+  }
+
   auto it = model_map_.find(model_id);
   if (it == model_map_.end()) {
     GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
@@ -925,6 +933,12 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
                                             vector<InputOutputDescInfo> &output_desc,
                                             std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
                                             bool new_model_desc) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->SetModelDescVersion(new_model_desc);
+    return hybrid_davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
+  }
+
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
                          "GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
@@ -943,6 +957,11 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
 ///
 Status ModelManager::GetDynamicBatchInfo(const uint32_t model_id, std::vector<std::vector<int64_t>> &batch_info,
                                          int32_t &dynamic_type) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    return hybrid_davinci_model->GetDynamicBatchInfo(batch_info, dynamic_type);
+  }
+
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                          "GetDynamicBatchInfo failed, Invalid model id %u!", model_id);
@@ -975,6 +994,12 @@ Status ModelManager::GetCombinedDynamicDims(const uint32_t model_id, vector<vect
 ///
 Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
                                                 std::vector<std::string> &user_input_shape_order) {
+  auto hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->GetUserDesignateShapeOrder(user_input_shape_order);
+    return SUCCESS;
+  }
+
   auto davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
                          "GetUserDesignateShapeOrder Failed, Invalid Model ID %u!", model_id)
@@ -990,6 +1015,12 @@ Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &
 }
 
 Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynamic_output_shape_info) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    hybrid_davinci_model->GetModelAttr(dynamic_output_shape_info);
+    return SUCCESS;
+  }
+
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHECK_NOTNULL(davinci_model);
   davinci_model->GetModelAttr(dynamic_output_shape_info);
@@ -1201,10 +1232,25 @@ Status ModelManager::LoadModelWithQ(uint32_t &model_id, const ModelData &model_d
 /// @param [in] stream   model stream
 /// @param [in] async_mode  is asynchronize mode.
 /// @param [in] input_data  input data
+/// @param [in] input_desc  description of input data
 /// @param [out] output_data  output data
+/// @param [out] output_desc  description of output data
 ///
 Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                                  OutputData &output_data) {
+                                  const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                                  std::vector<GeTensorDesc> &output_desc) {
+  std::shared_ptr<hybrid::HybridDavinciModel> hybrid_davinci_model = GetHybridModel(model_id);
+  if (hybrid_davinci_model != nullptr) {
+    auto inputs = input_data.blobs;
+    auto outputs = output_data.blobs;
+
+    Status status = hybrid_davinci_model->Execute(inputs, input_desc, outputs, output_desc, stream);
+    if (status == SUCCESS) {
+      GELOGI("Execute model %u success.", model_id);
+    }
+    return status;
+  }
+
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "Invalid model id %u.", model_id);
 
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index 9821a4ab..e3780d5b 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -148,10 +148,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
   /// @param [in] stream   model stream
   /// @param [in] async_mode  is asynchronize mode.
   /// @param [in] input_data  model input data
+  /// @param [in] input_desc  description of model input data
   /// @param [out] output_data  model output data
+  /// @param [out] output_desc  description of model output data
   ///
   ge::Status ExecuteModel(uint32_t model_id, rtStream_t stream, bool async_mode, const InputData &input_data,
-                          OutputData &output_data);
+                          const std::vector<GeTensorDesc> &input_desc, OutputData &output_data,
+                          std::vector<GeTensorDesc> &output_desc);
 
   ge::Status SyncExecuteModel(uint32_t model_id, const std::vector<GeTensor> &inputs, std::vector<GeTensor> &outputs);
 
diff --git a/ge/graph/partition/dynamic_shape_partition.cc b/ge/graph/partition/dynamic_shape_partition.cc
index 87fac994..95f13b6f 100755
--- a/ge/graph/partition/dynamic_shape_partition.cc
+++ b/ge/graph/partition/dynamic_shape_partition.cc
@@ -26,6 +26,7 @@
 #include <vector>
 #include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
+#include "framework/common/debug/log.h"
 #include "framework/common/types.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/utils/graph_utils.h"
@@ -72,7 +73,7 @@ Status DynamicShapePartitioner::Partition() {
   }
   REQUIRE(AttrUtils::SetBool(*root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, true),
           "Failed set dynamic shape partitioned flag on root graph %s.", root_graph_->GetName().c_str());
-
+  REQUIRE_SUCCESS(CtrlEdgeTransfer(), "Failed do ctrl edge transfer!");
   DumpGraph("_Before_DSP");
   auto status = PartitionImpl();
   GELOGD("%s.", DebugString().c_str());
@@ -86,6 +87,50 @@ Status DynamicShapePartitioner::Partition() {
   return status;
 }
 
+Status DynamicShapePartitioner::CtrlEdgeTransfer() {
+  GELOGD("Do ctrl edge transfer start!");
+  GE_CHECK_NOTNULL(root_graph_);
+
+  bool is_dynamic_shape = false;
+  (void)AttrUtils::GetBool(root_graph_, ATTR_NAME_DYNAMIC_SHAPE_PARTITIONED, is_dynamic_shape);
+  if (!is_dynamic_shape) {
+    return SUCCESS;
+  }
+  for (auto &subgraph : root_graph_->GetAllSubgraphs()) {
+    for (ge::NodePtr &n : subgraph->GetDirectNode()) {
+      auto op_desc = n->GetOpDesc();
+      if (op_desc == nullptr) {
+        continue;
+      }
+      auto op_type = op_desc->GetType();
+      if (op_type == CONSTANT || op_type == CONSTANTOP) {
+        if (n->GetInAllNodes().empty()) {
+          GELOGD("[CtrlEdgeTransferPass] node [%s] in nodes is empty", n->GetName().c_str());
+          continue;
+        }
+
+        GELOGD("start to tranfer ctrl edge for const node [%s]", n->GetName().c_str());
+
+        for (auto &in_control_node : n->GetInControlNodes()) {
+          GE_CHECK_NOTNULL(in_control_node);
+          GE_CHK_STATUS_RET(ge::GraphUtils::RemoveEdge(in_control_node->GetOutControlAnchor(),
+                                                       n->GetInControlAnchor()), "remove edge failed");
+          for (auto &out_node : n->GetOutNodes()) {
+            if (out_node == nullptr) {
+              continue;
+            }
+            GE_CHK_STATUS_RET(ge::GraphUtils::AddEdge(in_control_node->GetOutControlAnchor(),
+                                                      out_node->GetInControlAnchor()), "add edge failed.");
+          }
+        }
+      }
+    }
+  }
+
+  GELOGD("Do ctrl edge transfer end!");
+  return SUCCESS;
+}
+
 Status DynamicShapePartitioner::PartitionImpl() {
   REQUIRE_SUCCESS(root_graph_->TopologicalSorting(), "Graph topological sort failed.");
   REQUIRE_SUCCESS(InitClusters(), "Failed init cluster nodes.");
diff --git a/ge/graph/partition/dynamic_shape_partition.h b/ge/graph/partition/dynamic_shape_partition.h
index b0477ae8..9772615e 100644
--- a/ge/graph/partition/dynamic_shape_partition.h
+++ b/ge/graph/partition/dynamic_shape_partition.h
@@ -151,6 +151,7 @@ class DynamicShapePartitioner {
   Status IsUnknownShapeGraph(ge::ComputeGraphPtr graph, bool &is_unknow);
   Status IsUnknownShapeNode(ge::NodePtr node, bool &is_unknow);
   bool IsUnknownShapeTensor(const ge::GeTensorDesc &tensor);
+  Status CtrlEdgeTransfer();
   ge::ComputeGraphPtr root_graph_;                                        // The original graph to partition
   std::unordered_map<NodePtr, std::shared_ptr<Cluster>> node_2_cluster_;  // Record nodes and the cluster it belongs to
   // topological sorted clusters, this field will change with the splitting.
diff --git a/ge/graph/passes/pass_utils.cc b/ge/graph/passes/pass_utils.cc
index 5359ff63..3adfbde3 100644
--- a/ge/graph/passes/pass_utils.cc
+++ b/ge/graph/passes/pass_utils.cc
@@ -37,10 +37,6 @@
 #include "graph/utils/type_utils.h"
 
 namespace ge {
-namespace {
-const uint32_t kShapeDimSize = 1;
-const uint32_t DIM_SIZE_TWO = 2;
-}  // namespace
 
 Status PassUtils::ConstructTensorDescWithData(const GeTensorDesc &out_desc, std::vector<int64_t> &data,
                                               std::vector<GeTensorPtr> &v_output, const bool scalar_output) {
diff --git a/ge/graph/passes/transop_breadth_fusion_pass.cc b/ge/graph/passes/transop_breadth_fusion_pass.cc
index 21fb1eaf..689510f0 100644
--- a/ge/graph/passes/transop_breadth_fusion_pass.cc
+++ b/ge/graph/passes/transop_breadth_fusion_pass.cc
@@ -63,7 +63,7 @@ std::string TransOpBreadthFusionPass::GetNodeId(const int anchor_index, const No
   GE_IF_BOOL_EXEC(node == nullptr || node->GetOpDesc() == nullptr, GELOGE(FAILED, "node is null"); return "");
   if (node->GetType() == CAST) {
     trans_data_type = true;
-  } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED) {
+  } else if (node->GetType() == TRANSPOSE || node->GetType() == TRANSPOSED || node->GetType() == EXPANDDIMS) {
     trans_format = true;
     trans_shape = true;
   } else if (node->GetType() == TRANSDATA) {
diff --git a/ge/host_cpu_engine/CMakeLists.txt b/ge/host_cpu_engine/CMakeLists.txt
index 02b5f996..97b5a0f5 100644
--- a/ge/host_cpu_engine/CMakeLists.txt
+++ b/ge/host_cpu_engine/CMakeLists.txt
@@ -8,7 +8,7 @@ set(SRC_LIST
     "engine/host_cpu_engine.cc"
     "ops_kernel_store/host_cpu_ops_kernel_info.cc"
     "ops_kernel_store/op/op_factory.cc"
-    "ops_kernel_store/op/host_op.cc" 
+    "ops_kernel_store/op/host_op.cc"
 )
 
 set(CPU_OPS_KERNEL_LIST
@@ -98,7 +98,7 @@ target_link_libraries(atc_host_cpu_engine PRIVATE
 
 set_target_properties(atc_host_cpu_engine PROPERTIES
     OUTPUT_NAME host_cpu_engine
-    LIBRARY_OUTPUT_DIRECTORY atclib 
+    LIBRARY_OUTPUT_DIRECTORY atclib
 )
 
 ############ libhost_cpu_opskernel_builder.so ############
@@ -185,7 +185,7 @@ set_target_properties(atc_host_cpu_opskernel_builder PROPERTIES
 )
 
 ############ libhost_cpu_opskernel_builder.a ############
-add_library(host_cpu_opskernel_builder_static SHARED ${CPU_OPS_KERNEL_LIST})
+add_library(host_cpu_opskernel_builder_static STATIC ${CPU_OPS_KERNEL_LIST})
 
 target_compile_options(host_cpu_opskernel_builder_static PRIVATE
     -Werror
diff --git a/ge/host_kernels/floordiv_kernel.cc b/ge/host_kernels/floordiv_kernel.cc
index e254af09..df381212 100644
--- a/ge/host_kernels/floordiv_kernel.cc
+++ b/ge/host_kernels/floordiv_kernel.cc
@@ -112,8 +112,8 @@ void FloorDivKernel::ShapeCal(const std::vector<ge::ConstGeTensorPtr> &input, Ge
 template <typename T>
 T FloorDivKernel::DivCal(const T &x_i, const T &y_i) {
   if ((x_i < static_cast<T>(0)) != (y_i < static_cast<T>(0))) {
-    T abs_x_i = std::abs(x_i);
-    T abs_y_i = std::abs(y_i);
+    T abs_x_i = x_i < 0 ? -x_i : x_i;
+    T abs_y_i = y_i < 0 ? -y_i : y_i;
     return static_cast<T>(static_cast<int32_t>(-(abs_x_i + abs_y_i - 1) / abs_y_i));
   } else {
     return static_cast<T>(static_cast<int32_t>(x_i / y_i));
diff --git a/ge/host_kernels/floordiv_kernel.h b/ge/host_kernels/floordiv_kernel.h
index d3dc3ff7..b8f6dd12 100755
--- a/ge/host_kernels/floordiv_kernel.h
+++ b/ge/host_kernels/floordiv_kernel.h
@@ -40,10 +40,6 @@ class FloorDivKernel : public Kernel {
   template <typename T>
   Status DataCal(const std::vector<ConstGeTensorPtr> &input, ge::GeTensorPtr output_ptr);
   Status ComputeByDataType(DataType data_type, const std::vector<ConstGeTensorPtr> &input, GeTensorPtr output_ptr);
-
-  int64_t axis_dim_;
-  int64_t head_dim_;
-  int64_t end_dim_;
 };
 }  // namespace ge
 
diff --git a/ge/host_kernels/ssd_prior_box_kernel.cc b/ge/host_kernels/ssd_prior_box_kernel.cc
index b93a4047..57af4026 100644
--- a/ge/host_kernels/ssd_prior_box_kernel.cc
+++ b/ge/host_kernels/ssd_prior_box_kernel.cc
@@ -187,7 +187,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin
     return PARAM_INVALID;
   }
 
-  uint tmp_value = aspect_ratios_size * min_sizes_size;
+  uint32_t tmp_value = aspect_ratios_size * min_sizes_size;
   if (ge::CheckUint32AddOverflow(tmp_value, max_sizes_size) != SUCCESS) {
     GELOGW("Failed to get list param.");
     return PARAM_INVALID;
@@ -199,7 +199,7 @@ Status SsdPriorboxKernel::GetNumPriorAndDimSize(uint32_t aspect_ratios_size, uin
     return PARAM_INVALID;
   }
   num_priors = static_cast<int>(tmp_value);
-  
+
   if (ge::CheckIntMulOverflow(layer_width, layer_height) != SUCCESS) {
     GELOGW("Failed to get list param.");
     return PARAM_INVALID;
@@ -288,7 +288,7 @@ std::unique_ptr<float[]> SsdPriorboxKernel::BoundaryCalulate(int dim_size, int l
     }
   }
 
-  return std::move(output_data);
+  return output_data;
 }
 
 Status SsdPriorboxKernel::Compute(const NodePtr &node, std::vector<GeTensorPtr> &v_output) {
diff --git a/ge/hybrid/executor/hybrid_execution_context.h b/ge/hybrid/executor/hybrid_execution_context.h
index 0910d2c7..0fa5a5d7 100644
--- a/ge/hybrid/executor/hybrid_execution_context.h
+++ b/ge/hybrid/executor/hybrid_execution_context.h
@@ -77,7 +77,7 @@ do { \
   RECORD_PROFILING_EVENT((context), HybridProfiler::EXECUTION, fmt, "Execution", name,  ##__VA_ARGS__)
 
 #define RECORD_CALLBACK_EVENT(context, name, fmt, ...) \
-  RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACK, fmt, "Callback", name,  ##__VA_ARGS__)
+  RECORD_PROFILING_EVENT((context), HybridProfiler::CALLBACKS, fmt, "Callback", name,  ##__VA_ARGS__)
 }  // namespace hybrid
 }  // namespace ge
 #endif // GE_HYBRID_EXECUTOR_HYBRID_EXECUTION_CONTEXT_H_
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.cc b/ge/hybrid/executor/hybrid_model_async_executor.cc
index 468a7014..91996ab3 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_async_executor.cc
@@ -353,6 +353,44 @@ Status HybridModelAsyncExecutor::CopyOutputs(HybridModelExecutor::ExecuteArgs &a
   return SUCCESS;
 }
 
+Status HybridModelAsyncExecutor::Execute(const std::vector<DataBuffer> &inputs,
+                                         const std::vector<GeTensorDesc> &input_desc,
+                                         std::vector<DataBuffer> &outputs,
+                                         std::vector<GeTensorDesc> &output_desc) {
+  GELOGI("Start to execute model.");
+
+  HybridModelExecutor::ExecuteArgs args;
+  args.inputs.resize(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    TensorValue tensor_value(inputs[i].data, inputs[i].length);
+    args.inputs[i] = tensor_value;
+  }
+  GE_CHK_STATUS_RET(executor_->Execute(args), "Failed to execute model.");
+  for (const auto &output_tensor_desc : args.output_desc) {
+    output_desc.emplace_back(*output_tensor_desc);
+  }
+
+  for (size_t i = 0; i < args.outputs.size(); ++i) {
+    int64_t output_real_size = 0;
+    ge::graphStatus graph_status = TensorUtils::GetTensorSizeInBytes(output_desc[i], output_real_size);
+    if (graph_status != GRAPH_SUCCESS) {
+      GELOGE(FAILED, "Get tensor size in bytes failed.");
+      return FAILED;
+    }
+    if (output_real_size > 0) {
+      if (outputs[i].length < static_cast<uint64_t>(output_real_size)) {
+        GELOGE(FAILED, "output idx[%zu], the memory size of output[%lu] given by user should be greater than or equal to the real size of output[%ld]",
+               i, outputs[i].length, output_real_size);
+        return FAILED;
+      }
+      GE_CHK_RT_RET(rtMemcpy(outputs[i].data, outputs[i].length, args.outputs[i].GetData(), output_real_size, RT_MEMCPY_DEVICE_TO_DEVICE));
+    }
+    outputs[i].length = output_real_size;
+  }
+
+  return SUCCESS;
+}
+
 Status HybridModelAsyncExecutor::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
   GELOGD("Start to execute model.");
   // prepare inputs
diff --git a/ge/hybrid/executor/hybrid_model_async_executor.h b/ge/hybrid/executor/hybrid_model_async_executor.h
index 8de2beb6..21833b0b 100644
--- a/ge/hybrid/executor/hybrid_model_async_executor.h
+++ b/ge/hybrid/executor/hybrid_model_async_executor.h
@@ -35,6 +35,11 @@ class HybridModelAsyncExecutor {
 
   Status Init();
 
+  Status Execute(const std::vector<DataBuffer> &inputs,
+                 const std::vector<GeTensorDesc> &input_desc,
+                 std::vector<DataBuffer> &outputs,
+                 std::vector<GeTensorDesc> &output_desc);
+
   Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
 
   Status Start(const std::shared_ptr<ModelListener> &listener);
diff --git a/ge/hybrid/executor/hybrid_profiler.h b/ge/hybrid/executor/hybrid_profiler.h
index f6027a0b..94a042e4 100644
--- a/ge/hybrid/executor/hybrid_profiler.h
+++ b/ge/hybrid/executor/hybrid_profiler.h
@@ -33,7 +33,7 @@ class HybridProfiler {
     SHAPE_INFERENCE,
     COMPILE,
     EXECUTION,
-    CALLBACK
+    CALLBACKS
   };
 
   struct Event {
diff --git a/ge/hybrid/executor/node_state.h b/ge/hybrid/executor/node_state.h
index 48b2ed72..04f1ee4b 100644
--- a/ge/hybrid/executor/node_state.h
+++ b/ge/hybrid/executor/node_state.h
@@ -27,7 +27,7 @@
 namespace ge {
 namespace hybrid {
 class NodeTask;
-class GraphExecutionContext;
+struct GraphExecutionContext;
 class SubgraphContext;
 
 class ShapeFuture {
diff --git a/ge/hybrid/hybrid_davinci_model.cc b/ge/hybrid/hybrid_davinci_model.cc
index b6f5bb84..7009331c 100755
--- a/ge/hybrid/hybrid_davinci_model.cc
+++ b/ge/hybrid/hybrid_davinci_model.cc
@@ -38,6 +38,14 @@ class HybridDavinciModel::Impl {
     return SUCCESS;
   }
 
+  Status Execute(const std::vector<DataBuffer> &inputs,
+                 const std::vector<GeTensorDesc> &input_desc,
+                 std::vector<DataBuffer> &outputs,
+                 std::vector<GeTensorDesc> &output_desc,
+                 rtStream_t stream) {
+    return executor_.Execute(inputs, input_desc, outputs, output_desc);
+  }
+
   Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
     return executor_.Execute(inputs, outputs);
   }
@@ -68,6 +76,33 @@ class HybridDavinciModel::Impl {
     executor_.SetDeviceId(device_id);
   }
 
+  uint64_t GetSessionId() {
+    return model_.GetSessionId();
+  }
+
+  Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
+    return model_.GetDynamicBatchInfo(batch_info, dynamic_type);
+  }
+
+  void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
+    model_.GetUserDesignateShapeOrder(user_input_shape_order);
+  }
+
+  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
+    model_.GetModelAttr(dynamic_output_shape_info);
+  }
+
+  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                vector<InputOutputDescInfo> &output_desc,
+                                std::vector<uint32_t> &input_formats,
+                                std::vector<uint32_t> &output_formats) {
+    return model_.GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats);
+  }
+
+  void SetModelDescVersion(bool is_new_model_desc) {
+    model_.SetModelDescVersion(is_new_model_desc);
+  }
+
  private:
   std::shared_ptr<ModelListener> listener_;
   HybridModel model_;
@@ -95,6 +130,14 @@ Status HybridDavinciModel::Init() {
   return impl_->Init();
 }
 
+Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs,
+                                   const std::vector<GeTensorDesc> &input_desc,
+                                   std::vector<DataBuffer> &outputs,
+                                   std::vector<GeTensorDesc> &output_desc, rtStream_t stream) {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->Execute(inputs, input_desc, outputs, output_desc, stream);
+}
+
 Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
   GE_CHECK_NOTNULL(impl_);
   return impl_->Execute(inputs, outputs);
@@ -132,5 +175,41 @@ void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
     impl_->SetDeviceId(device_id);
   }
 }
+
+Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->GetDynamicBatchInfo(batch_info, dynamic_type);
+}
+
+void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
+  if (impl_ != nullptr) {
+    impl_->GetUserDesignateShapeOrder(user_input_shape_order);
+  }
+}
+
+void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
+  if (impl_ != nullptr) {
+    impl_->GetModelAttr(dynamic_output_shape_info);
+  }
+}
+
+Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                                  vector<InputOutputDescInfo> &output_desc,
+                                                  std::vector<uint32_t> &input_formats,
+                                                  std::vector<uint32_t> &output_formats) {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->GetInputOutputDescInfo(input_desc, output_desc, input_formats, output_formats);
+}
+
+void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
+  if (impl_ != nullptr) {
+    impl_->SetModelDescVersion(is_new_model_desc);
+  }
+}
+
+uint64_t HybridDavinciModel::GetSessionId() {
+  GE_CHECK_NOTNULL(impl_);
+  return impl_->GetSessionId();
+}
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/hybrid_davinci_model.h b/ge/hybrid/hybrid_davinci_model.h
index 00a48c1e..5349390c 100644
--- a/ge/hybrid/hybrid_davinci_model.h
+++ b/ge/hybrid/hybrid_davinci_model.h
@@ -37,6 +37,12 @@ class HybridDavinciModel {
 
   Status Init();
 
+  Status Execute(const std::vector<DataBuffer> &inputs,
+                 const std::vector<GeTensorDesc> &input_desc,
+                 std::vector<DataBuffer> &outputs,
+                 std::vector<GeTensorDesc> &output_desc,
+                 rtStream_t stream);
+
   Status Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
 
   Status ModelRunStart();
@@ -51,6 +57,21 @@ class HybridDavinciModel {
 
   void SetDeviceId(uint32_t device_id);
 
+  uint64_t GetSessionId();
+
+  Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
+
+  void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order);
+
+  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);
+
+  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                vector<InputOutputDescInfo> &output_desc,
+                                std::vector<uint32_t> &input_formats,
+                                std::vector<uint32_t> &output_formats);
+
+  void SetModelDescVersion(bool is_new_model_desc);
+
  private:
   HybridDavinciModel() = default;
   class Impl;
diff --git a/ge/hybrid/hybrid_davinci_model_stub.cc b/ge/hybrid/hybrid_davinci_model_stub.cc
index b95b9efc..366845c5 100644
--- a/ge/hybrid/hybrid_davinci_model_stub.cc
+++ b/ge/hybrid/hybrid_davinci_model_stub.cc
@@ -28,6 +28,14 @@ Status HybridDavinciModel::Init() {
   return UNSUPPORTED;
 }
 
+Status HybridDavinciModel::Execute(const std::vector<DataBuffer> &inputs,
+                                   const std::vector<GeTensorDesc> &input_desc,
+                                   std::vector<DataBuffer> &outputs,
+                                   std::vector<GeTensorDesc> &output_desc,
+                                   rtStream_t stream) {
+  return UNSUPPORTED;
+}
+
 Status HybridDavinciModel::Execute(const vector<GeTensor> &inputs, vector<GeTensor> &outputs) {
   return UNSUPPORTED;
 }
@@ -52,5 +60,29 @@ void HybridDavinciModel::SetModelId(uint32_t model_id) {
 
 void HybridDavinciModel::SetDeviceId(uint32_t device_id) {
 }
+
+uint64_t HybridDavinciModel::GetSessionId() {
+  return 0;
+}
+
+Status HybridDavinciModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
+  return UNSUPPORTED;
+}
+
+void HybridDavinciModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
+}
+
+void HybridDavinciModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
+}
+
+Status HybridDavinciModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                                  vector<InputOutputDescInfo> &output_desc,
+                                                  std::vector<uint32_t> &input_formats,
+                                                  std::vector<uint32_t> &output_formats) {
+  return UNSUPPORTED;
+}
+
+void HybridDavinciModel::SetModelDescVersion(bool is_new_model_desc) {
+}
 }  // namespace hybrid
 }  // namespace ge
\ No newline at end of file
diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc
index 59c7be9a..c319b06b 100644
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@@ -21,12 +21,18 @@
 #include "graph/utils/graph_utils.h"
 #include "graph/utils/node_utils.h"
 #include "graph/utils/tensor_utils.h"
+#include "graph/utils/type_utils.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/model/hybrid_model_builder.h"
 #include "hybrid/node_executor/node_executor.h"
+#include "common/op/ge_op_utils.h"
 
 namespace ge {
 namespace hybrid {
+namespace {
+const int64_t kMemSizeUnknownShape = -1; // Unknown shape mem size
+}
+
 HybridModel::HybridModel(GeRootModelPtr ge_model) : ge_root_model_(std::move(ge_model)) {
 }
 
@@ -128,7 +134,187 @@ const GraphItem *HybridModel::GetSubgraphItem(const ComputeGraphPtr &subgraph) c
 }
 
 const string &HybridModel::GetModelName() const {
-    return model_name_;
+  return model_name_;
+}
+
+Status HybridModel::GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type) {
+  // dynamic shape do not need dynamic batch
+  batch_info = {};
+  dynamic_type = -1;
+  return SUCCESS;
+}
+
+void HybridModel::GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order) {
+  // dynamic shape do not need dynamic batch
+  user_input_shape_order = {};
+}
+
+void HybridModel::GetModelAttr(std::vector<std::string> &dynamic_output_shape_info) {
+  dynamic_output_shape_info = {};
+}
+
+Status HybridModel::GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                           vector<InputOutputDescInfo> &output_desc,
+                                           std::vector<uint32_t> &input_formats,
+                                           std::vector<uint32_t> &output_formats) {
+  auto node_item_list = root_graph_item_->GetInputNodes();
+  if (node_item_list.empty()) {
+    GELOGE(FAILED, "node item list is empty!");
+    return FAILED;
+  }
+
+  GE_CHECK_NOTNULL(node_item_list[0]->node);
+  GE_CHECK_NOTNULL(node_item_list[0]->node->GetOpDesc());
+  if (node_item_list[0]->node->GetOpDesc()->GetInputsSize() != 1) {
+    GELOGE(FAILED, "input size of op is not 1!");
+    return FAILED;
+  }
+
+  GE_CHK_STATUS_RET(GetInputDescInfo(input_desc, input_formats), "get input desc info failed");
+  GE_CHK_STATUS_RET(GetOutputDescInfo(output_desc, output_formats), "get ouput desc info failed");
+
+  return SUCCESS;
+}
+
+void HybridModel::SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t,int64_t>> &shape_ranges,
+                                                 InputOutputDescInfo &input) {
+  for (auto model_input_dim : model_input_dims) {
+    input.shape_info.dims.push_back(model_input_dim);
+  }
+  input.shape_info.shape_ranges = shape_ranges;
+  return;
+}
+
+void HybridModel::CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input) {
+  std::vector<std::pair<int64_t,int64_t>> shape_ranges;
+  if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) {
+    // When static aipp is set, need to get the model input dims which processed by aipp
+    vector<int64_t> model_input_dims;
+    (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims);
+    SetInputDimsAndShapeRangesInfo(model_input_dims, shape_ranges, input);
+    return;
+  }
+  // judge if this data is linked dynamic aipp first, multiply batch has been considered
+  if (op_desc->HasAttr("_dynamic_aipp_input_dims")) {
+    vector<int64_t> dynamic_aipp_input_dims;
+    (void)AttrUtils::GetListInt(op_desc, "_dynamic_aipp_input_dims", dynamic_aipp_input_dims);
+    SetInputDimsAndShapeRangesInfo(dynamic_aipp_input_dims, shape_ranges, input);
+    return;
+  } else {
+    vector<int64_t> input_dims = op_desc->GetInputDescPtr(0)->GetShape().GetDims();
+    op_desc->GetInputDescPtr(0)->GetShapeRange(shape_ranges);
+    SetInputDimsAndShapeRangesInfo(input_dims, shape_ranges, input);
+    return;
+  }
+}
+
+Status HybridModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
+  auto node_item_list = root_graph_item_->GetInputNodes();
+  for (auto &node_item : node_item_list) {
+    InputOutputDescInfo input;
+
+    GE_CHECK_NOTNULL(node_item->node);
+    auto op_desc = node_item->node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    GE_CHECK_NOTNULL(op_desc->GetInputDescPtr(0));
+
+    Format format = op_desc->GetInputDescPtr(0)->GetFormat();
+    input.data_type = op_desc->GetInputDescPtr(0)->GetDataType();
+    input.name = op_desc->GetName();
+
+    int64_t input_size = 0;
+    GE_CHK_STATUS_RET(TensorUtils::GetSize(*op_desc->GetInputDescPtr(0), input_size), "get input size failed.");
+
+    // support dynamic shape
+    if (input_size < 0) {
+      GELOGD("dynamic shape scene, input size is unknown. "
+             "format=%d, data_type=%d, input_size=%ld",
+             format, input.data_type, input_size);
+      input_size = kMemSizeUnknownShape;   // -1
+    }
+
+    // not support dynamic shape input for now, so input_size here will be not less than zero.
+    input.size = input_size;
+
+    CreateInputDimsInfo(op_desc, input);
+
+    formats.push_back(format);
+    input_desc.push_back(input);
+  }
+  is_new_model_desc_ = false;
+  return SUCCESS;
+}
+
+void HybridModel::CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output_desc_info, uint32_t &format_result) {
+  GE_IF_BOOL_EXEC(output_desc == nullptr, GELOGE(FAILED, "output desc ptr is nullptr"); return );
+  Format format = output_desc->GetFormat();
+  GeShape shape = output_desc->GetShape();
+  std::vector<std::pair<int64_t,int64_t>> shape_ranges;
+  output_desc->GetShapeRange(shape_ranges);
+  DataType data_type = output_desc->GetDataType();
+  format_result = format;
+  if (format == FORMAT_FRACTAL_Z) {  // FraczToHWCK
+    int64_t k = shape.GetDim(0);                                           // 0: first dim
+    int64_t c = shape.GetDim(1);                                           // 1: second dim
+    int64_t h = shape.GetDim(2);                                           // 2: third dim
+    int64_t w = shape.GetDim(3);                                           // 3: forth dim
+    output_desc_info.shape_info.dims.push_back(h);
+    output_desc_info.shape_info.dims.push_back(w);
+    output_desc_info.shape_info.dims.push_back(c);
+    output_desc_info.shape_info.dims.push_back(k);
+    if (shape_ranges.size() == 4) {                   // 4 dims
+      output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[2]);  // h:2
+      output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[3]);  // w:3
+      output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[1]);  // c:1
+      output_desc_info.shape_info.shape_ranges.push_back(shape_ranges[0]);  // k:0
+    }
+    format_result = FORMAT_HWCN;
+  } else {
+    for (size_t j = 0; j < shape.GetDimNum(); j++) {
+      output_desc_info.shape_info.dims.push_back(shape.GetDim(j));
+    }
+    output_desc_info.shape_info.shape_ranges = shape_ranges;
+  }
+  int64_t tensor_size = 0;
+  (void)TensorUtils::CalcTensorMemSize(shape, format, data_type, tensor_size);
+  output_desc_info.size = static_cast<uint64_t>(tensor_size);
+  output_desc_info.data_type = output_desc->GetDataType();
+}
+
+Status HybridModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats) {
+  std::vector<ConstGeTensorDescPtr> output_desc_list;
+  GE_CHK_STATUS_RET(root_graph_item_->GetOutputDescList(output_desc_list), "get output desc info failed");  // output_desc_list contains vaild input desc
+
+  vector<std::string> out_node_names;
+  (void)ge::AttrUtils::GetListStr(ge_root_model_->GetRootGraph(), ATTR_MODEL_OUT_NODES_NAME, out_node_names);
+
+  GE_CHECK_NOTNULL(root_graph_item_->GetOutputNode());
+  auto op_desc = root_graph_item_->GetOutputNode()->op_desc;
+  GE_CHECK_NOTNULL(op_desc);
+
+  auto out_size = static_cast<uint32_t>(op_desc->GetInputsSize());
+  GE_CHK_BOOL_RET_STATUS(out_size == output_desc_list.size(), FAILED, "output size[%u] not match output_desc_list size[%zu]", out_size, output_desc_list.size());
+
+  for (uint32_t index = 0; index < out_size; ++index) {
+    string output_name;
+    std::vector<std::string> src_name = op_desc->GetSrcName();
+    std::vector<int64_t> src_index = op_desc->GetSrcIndex();
+    if (out_size == out_node_names.size()) {
+      bool contains_colon = out_node_names[index].find(":") != std::string::npos;
+      output_name = contains_colon ? out_node_names[index] : out_node_names[index] + ":" + std::to_string(src_index[index]);
+    } else {
+      output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" + std::to_string(src_index[index]);
+    }
+
+    InputOutputDescInfo output_desc_info;
+    output_desc_info.name = output_name;
+
+    uint32_t format_result;
+    CreateOutput(output_desc_list[index], output_desc_info, format_result);
+    output_desc.push_back(output_desc_info);
+    formats.push_back(format_result);
+  }
+  return SUCCESS;
 }
 }  // namespace hybrid
 }  // namespace ge
diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h
index 11311968..1bc08053 100644
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@@ -83,6 +83,30 @@ class HybridModel {
 
   const string &GetModelName() const;
 
+  Status GetDynamicBatchInfo(std::vector<std::vector<int64_t>> &batch_info, int32_t &dynamic_type);
+
+  void GetUserDesignateShapeOrder(std::vector<std::string> &user_input_shape_order);
+
+  void GetModelAttr(std::vector<std::string> &dynamic_output_shape_info);
+
+  Status GetInputOutputDescInfo(vector<InputOutputDescInfo> &input_desc,
+                                vector<InputOutputDescInfo> &output_desc,
+                                std::vector<uint32_t> &input_formats,
+                                std::vector<uint32_t> &outputFormats);
+
+  Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);
+
+  void CreateOutput(ConstGeTensorDescPtr &output_desc, InputOutputDescInfo &output, uint32_t &format_result);
+
+  Status GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &formats);
+
+  void CreateInputDimsInfo(const OpDescPtr &op_desc, InputOutputDescInfo &input);
+
+  void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
+
+  void SetInputDimsAndShapeRangesInfo(const vector<int64_t> &model_input_dims, std::vector<std::pair<int64_t, int64_t>> &shape_ranges,
+                                      InputOutputDescInfo &input);
+
  private:
   friend class HybridModelBuilder;
   friend class HybridModelAsyncExecutor;
@@ -101,6 +125,8 @@ class HybridModel {
   std::map<std::string, std::unique_ptr<GraphItem>> subgraph_items_;
   std::map<NodePtr, std::unique_ptr<NodeItem>> node_items_;
 
+  bool is_new_model_desc_ = false;    // support aipp
+
   // runtime fields
   uint32_t device_id_ = 0;
   uint32_t model_id_ = 0;
diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc
index cd4c0a83..d519c35b 100755
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc
@@ -27,16 +27,41 @@
 #include "graph/utils/graph_utils.h"
 #include "hybrid/common/npu_memory_allocator.h"
 #include "hybrid/node_executor/node_executor.h"
+#include "framework/common/debug/ge_log.h"
+#include "graph/utils/attr_utils.h"
 
 namespace ge {
 namespace hybrid {
 namespace {
 const uint32_t kSubgraphIndex = 0U;
 const uint32_t kVarOutputIndex = 0U;
-const uint32_t kAlignment = 32;
 const int kBytes = 8;
 const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";
 
+Status SetOutputNameAttr(ComputeGraph &graph) {
+  vector<string> output_names;
+  for (const auto &node : graph.GetDirectNode()) {
+    auto op_desc = node->GetOpDesc();
+    if (op_desc == nullptr) {
+      continue;
+    }
+    auto op_type = op_desc->GetType();
+    if (op_type == NETOUTPUT) {
+      for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
+        const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
+        GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
+        NodePtr in_node = peer_out_anchor->GetOwnerNode();
+        GE_CHECK_NOTNULL(in_node);
+        output_names.push_back(in_node->GetName());
+      }
+    }
+  }
+  GE_CHK_BOOL_EXEC(ge::AttrUtils::SetListStr(&graph, ATTR_MODEL_OUT_NODES_NAME, output_names),
+                   GELOGE(FAILED, "SetListStr of ATTR_MODEL_OUT_NODES_NAME failed.");
+                   return FAILED);
+  return SUCCESS;
+}
+
 int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) {
   int64_t var_size = 0;
   auto data_type = desc.GetDataType();
@@ -939,6 +964,10 @@ Status HybridModelBuilder::LoadGeModel(ComputeGraph &sub_graph, const GeModelPtr
 
 Status HybridModelBuilder::IndexTaskDefs() {
   const auto &root_graph = ge_root_model_->GetRootGraph();
+  if (SetOutputNameAttr(*root_graph) != SUCCESS) {
+    GELOGW("Set output name attr failed.");
+  }
+
   for (auto &it : ge_root_model_->GetSubgraphInstanceNameToModel()) {
     auto &name = it.first;
     auto &ge_model = it.second;
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 998afd02..80ea579b 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -19,6 +19,7 @@
 #include "framework/common/debug/log.h"
 #include "hybrid/executor/hybrid_execution_context.h"
 #include "hybrid/node_executor/aicore/aicore_task_builder.h"
+#include "graph/load/new_model_manager/tbe_handle_store.h"
 
 using optiling::OpRunInfo;
 
@@ -36,6 +37,58 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
   return SUCCESS;
 }
 
+Status AiCoreOpTask::RegisterTbeHandle(const OpDesc &op_desc) {
+  auto op_desc_ptr = std::make_shared<OpDesc>(op_desc);
+  GE_CHECK_NOTNULL(op_desc_ptr);
+  auto tbe_kernel = op_desc_ptr->TryGetExtAttr(OP_EXTATTR_NAME_TBE_KERNEL, TBEKernelPtr());
+  if (tbe_kernel == nullptr) {
+    GELOGE(INTERNAL_ERROR, "TBE: %s can't find tvm bin file!", op_desc_ptr->GetName().c_str());
+    return INTERNAL_ERROR;
+  }
+  TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
+  rtError_t rt_ret = rtQueryFunctionRegistered(stub_name_.c_str());
+  if (rt_ret != RT_ERROR_NONE) {
+    void *bin_handle = nullptr;
+    if (!kernel_store.FindTBEHandle(stub_name_.c_str(), bin_handle)) {
+      GELOGI("TBE: can't find the kernel_name[%s] in HandleMap", stub_name_.c_str());
+      rtDevBinary_t binary;
+      std::string json_string;
+      GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_MAGIC, json_string),
+                      GELOGI("Get original type of session_graph_id."));
+      if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AICPU") {
+        binary.magic = RT_DEV_BINARY_MAGIC_ELF_AICPU;
+      } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF") {
+        binary.magic = RT_DEV_BINARY_MAGIC_ELF;
+      } else if (json_string == "RT_DEV_BINARY_MAGIC_ELF_AIVEC") {
+        binary.magic = RT_DEV_BINARY_MAGIC_ELF_AIVEC;
+      } else {
+        GELOGE(PARAM_INVALID, "TBE: Invalid parameter magic number! json: %s", json_string.c_str());
+        return PARAM_INVALID;
+      }
+      binary.version = 0;
+      binary.data = tbe_kernel->GetBinData();
+      binary.length = tbe_kernel->GetBinDataSize();
+      GELOGI("TBE: binary.length: %lu", binary.length);
+      GE_CHK_RT_RET(rtDevBinaryRegister(&binary, &bin_handle));
+      std::string meta_data;
+      GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, TVM_ATTR_NAME_METADATA, meta_data),
+                      GELOGI("Get original type of json_string"));
+      GELOGI("TBE: meta data: %s", meta_data.empty() ? "null" : meta_data.c_str());
+      GE_IF_BOOL_EXEC(!meta_data.empty(), GE_CHK_RT_RET(rtMetadataRegister(bin_handle, meta_data.c_str())));
+      kernel_store.StoreTBEHandle(stub_name_.c_str(), bin_handle, tbe_kernel);
+    } else {
+      GELOGI("TBE: find the kernel_name[%s] in HandleMap", stub_name_.c_str());
+      kernel_store.ReferTBEHandle(stub_name_.c_str());
+    }
+    std::string kernel_name;
+    GE_IF_BOOL_EXEC(AttrUtils::GetStr(op_desc_ptr, op_desc_ptr->GetName() + "_kernelname", kernel_name),
+                    GELOGI("Get original type of kernel_name"));
+    GELOGI("TBE: binfile_key=%s, kernel_name=%s", stub_name_.c_str(), kernel_name.c_str());
+    GE_CHK_RT_RET(rtFunctionRegister(bin_handle, stub_name_.c_str(), stub_name_.c_str(), kernel_name.c_str(), 0));
+  }
+  return SUCCESS;
+}
+
 Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef &task_def) {
   GE_CHK_STATUS_RET(ValidateTaskDef(task_def),
                     "[%s] Failed to validate task def: [%s]",
@@ -45,6 +98,9 @@ Status AiCoreOpTask::InitWithTaskDef(const OpDesc &op_desc, const domi::TaskDef
   const domi::KernelDef &kernel_def = task_def.kernel();
   const domi::KernelContext &context = kernel_def.context();
   stub_name_ = kernel_def.stub_func();
+
+  GE_CHK_STATUS_RET(RegisterTbeHandle(op_desc));
+
   GE_CHK_RT_RET(rtGetFunctionByName(stub_name_.c_str(), &stub_func_));
   args_size_ = kernel_def.args_size();
   block_dim_ = kernel_def.block_dim();
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index 0447ade7..5818f384 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -62,6 +62,7 @@ class AiCoreOpTask {
   static Status ValidateTaskDef(const domi::TaskDef &task_def);
   Status InitWithTaskDef(const OpDesc &node, const domi::TaskDef &task_def);
   Status InitTilingInfo(const OpDesc &op_desc);
+  Status RegisterTbeHandle(const OpDesc &op_desc);
 
   std::string stub_name_;
   void *stub_func_ = nullptr;
diff --git a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h
index bf948349..b6dfd82b 100755
--- a/ge/hybrid/node_executor/aicore/aicore_task_compiler.h
+++ b/ge/hybrid/node_executor/aicore/aicore_task_compiler.h
@@ -26,7 +26,7 @@ namespace hybrid {
 class AiCoreTaskCompiler : public TaskCompiler {
  public:
   AiCoreTaskCompiler() = default;
-  ~AiCoreTaskCompiler() = default;
+  ~AiCoreTaskCompiler() override = default;
 
   Status CompileOp(const NodePtr &node, std::vector<domi::TaskDef> &tasks) override;
   Status Initialize() override;
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
index b984cc86..1205b190 100644
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.h
@@ -37,6 +37,8 @@ class AicpuNodeTaskBase : public NodeTask {
 
   ~AicpuNodeTaskBase() override = default;
 
+  using NodeTask::Init;
+
   virtual Status Init(const HybridModel &model) = 0;
 
   Status UpdateArgs(TaskContext &context) override;
diff --git a/ge/hybrid/node_executor/controlop/control_op_executor.h b/ge/hybrid/node_executor/controlop/control_op_executor.h
index 7520afd1..3becfaaa 100644
--- a/ge/hybrid/node_executor/controlop/control_op_executor.h
+++ b/ge/hybrid/node_executor/controlop/control_op_executor.h
@@ -25,6 +25,7 @@ namespace ge {
 namespace hybrid {
 class ControlOpNodeTask : public NodeTask {
  public:
+  using NodeTask::Init;
   virtual Status Init(const NodePtr &node, const HybridModel &model) = 0;
   Status UpdateArgs(TaskContext &context) override;
 
diff --git a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
index 7a83641d..a52e5670 100755
--- a/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
+++ b/ge/hybrid/node_executor/ge_local/ge_local_node_executor.cc
@@ -68,7 +68,7 @@ Status RefInputTask::RefOneByOne(TaskContext &context) {
            node_name_.c_str(), node_type_.c_str(), output_num, input_num);
     return INTERNAL_ERROR;
   }
-  for (uint32_t out_index = 0; out_index < output_num; ++out_index) {
+  for (uint32_t out_index = 0; out_index < static_cast<uint32_t>(output_num); ++out_index) {
     auto input = context.GetInput(out_index);
     GE_CHECK_NOTNULL(input);
     GE_CHK_STATUS_RET(context.SetOutput(out_index, *input));
diff --git a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
index 3bf71013..01fd391d 100644
--- a/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
+++ b/ge/hybrid/node_executor/host_cpu/kernel/assign_kernel.cc
@@ -20,7 +20,6 @@
 #include "hybrid/node_executor/host_cpu/kernel_factory.h"
 
 namespace {
-const size_t kAssignInputNum = 2;
 const size_t kAssignRefInputIndex = 0;
 const size_t kAssignValueInputIndex = 1;
 const size_t kAssignRefOutputIndex = 0;
diff --git a/ge/hybrid/node_executor/node_executor.cc b/ge/hybrid/node_executor/node_executor.cc
index e577f09b..95e50c31 100755
--- a/ge/hybrid/node_executor/node_executor.cc
+++ b/ge/hybrid/node_executor/node_executor.cc
@@ -34,7 +34,6 @@ const char *const kEngineNameAiCpuTf = "aicpu_tf_kernel";
 const char *const kEngineNameHccl = "ops_kernel_info_hccl";
 const char *const kEngineNameRts = "DNN_VM_RTS_OP_STORE";
 const char *const kEngineNameHostCpu = "DNN_VM_HOST_CPU_OP_STORE";
-const char *const kOwnerGraphIsUnknown = "OwnerGraphIsUnknown";
 }
 Status NodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
   GE_CHK_STATUS_RET_NOLOG(context.AllocateOutputs());
diff --git a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h
index 9ea544a1..73873002 100644
--- a/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h
+++ b/ge/hybrid/node_executor/partitioned_call/partitioned_call_node_executor.h
@@ -41,7 +41,6 @@ class PartitionedCallNodeTask : public NodeTask {
 
   const GraphItem *graph_item_;
   std::unique_ptr<SubgraphExecutor> subgraph_executor_;
-  GraphExecutionContext *context_ = nullptr;
 };
 
 class PartitionedCallNodeExecutor : public NodeExecutor {
diff --git a/ge/hybrid/node_executor/task_context.h b/ge/hybrid/node_executor/task_context.h
index 2cff0536..0549a1dc 100644
--- a/ge/hybrid/node_executor/task_context.h
+++ b/ge/hybrid/node_executor/task_context.h
@@ -29,7 +29,7 @@
 
 namespace ge {
 namespace hybrid {
-class GraphExecutionContext;
+struct GraphExecutionContext;
 class SubgraphContext;
 
 class TaskContext {
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 17dbf928..5a73126f 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -234,6 +234,22 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
   ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &input_data,
                        ge::RunModelData &output_data, bool async_mode = false);
 
+  ///
+  /// @ingroup ge
+  /// @brief Synchronous execution of offline model(Do not create thread)
+  /// @param [in] uint32_t model_id: Model ID to execute
+  /// @param [in] void* stream: stream to execute
+  /// @param [in] bool async_mode: is asynchronize mode.
+  /// @param [in] const domi::InputData *input_data: Model input data
+  /// @param [in] const std::vector<GeTensorDesc> &input_desc: description of model input data
+  /// @param [out] domi::OutputData *output_data: Model output data
+  /// @param [out] std::vector<GeTensorDesc> &output_desc: description of model output data
+  /// @return SUCCESS handle successfully / others handle failed
+  ///
+  ge::Status ExecModel(uint32_t model_id, void *stream, const ge::RunModelData &run_input_data,
+                       const std::vector<GeTensorDesc> &input_desc, ge::RunModelData &run_output_data,
+                       std::vector<GeTensorDesc> &output_desc, bool async_mode = false);
+
   ///
   /// @ingroup ge
   /// @brief Get weight memory size from model file

From c588b7029c088c0c488c23dbdea47f81a2e4fa0e Mon Sep 17 00:00:00 2001
From: taoxudonghaha <justsheldon@163.com>
Date: Mon, 7 Dec 2020 20:18:32 +0800
Subject: [PATCH 7/9] modify fwk_atc.bin

---
 ge/offline/CMakeLists.txt | 4 ++--
 ge/offline/module.mk      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ge/offline/CMakeLists.txt b/ge/offline/CMakeLists.txt
index b3a0d53c..2f9195bc 100644
--- a/ge/offline/CMakeLists.txt
+++ b/ge/offline/CMakeLists.txt
@@ -183,11 +183,11 @@ target_link_libraries(fwk_atc.bin PRIVATE
     c_sec
     graph
     error_manager
-    ge_compiler
+    ge_runner
     parser_common
     gflags
     json
-    runtime_compile
+    runtime
     slog
     static_mmpa
     -lrt
diff --git a/ge/offline/module.mk b/ge/offline/module.mk
index c14be50f..8018266a 100755
--- a/ge/offline/module.mk
+++ b/ge/offline/module.mk
@@ -149,8 +149,8 @@ LOCAL_SHARED_LIBRARIES := \
     libgraph \
     libregister \
     liberror_manager \
-    libge_compiler \
-    libruntime_compile \
+    libge_runner \
+    libruntime \
     libparser_common \
     liberror_manager \
 

From 7fa1ca9237d095b57f05ac39870fcbd051d2460e Mon Sep 17 00:00:00 2001
From: "gengchao4@huawei.com" <gengchao4@huawei.com>
Date: Mon, 7 Dec 2020 20:39:54 +0800
Subject: [PATCH 8/9] unify mutex for model_aicpu_kernel_ in different func

---
 ge/graph/load/new_model_manager/model_manager.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 080ca889..74c37a1b 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -216,7 +216,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
 
 ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
   GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
-  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::lock_guard<std::mutex> lock(map_mutex_);
   std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
   if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
     Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@@ -229,7 +229,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
 }
 
 ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
-  std::lock_guard<std::mutex> lock(sess_ids_mutex_);
+  std::lock_guard<std::mutex> lock(map_mutex_);
   std::vector<uint64_t> v_aicpu_kernel;
   std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
   if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {

From 8291221f64bfebc1c572a1410899830594de9d16 Mon Sep 17 00:00:00 2001
From: weiyang <yangwei79@huawei.com>
Date: Sat, 5 Dec 2020 15:07:17 +0800
Subject: [PATCH 9/9] fix cust aicpu

---
 ge/graph/load/new_model_manager/model_manager.cc   | 14 ++++++++++----
 ge/graph/load/new_model_manager/model_manager.h    |  2 +-
 .../task_info/kernel_task_info.cc                  |  4 +++-
 .../task_info/super_kernel/super_kernel.cc         |  2 +-
 .../node_executor/aicpu/aicpu_node_executor.cc     |  8 ++++++--
 ge/single_op/task/aicpu_kernel_task_builder.cc     |  8 ++++++--
 6 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/ge/graph/load/new_model_manager/model_manager.cc b/ge/graph/load/new_model_manager/model_manager.cc
index 74c37a1b..5d9b6e65 100755
--- a/ge/graph/load/new_model_manager/model_manager.cc
+++ b/ge/graph/load/new_model_manager/model_manager.cc
@@ -1243,8 +1243,8 @@ Status ModelManager::CreateAicpuSession(uint64_t session_id) {
   return SUCCESS;
 }
 
-Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name) {
-  GELOGI("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
+Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded) {
+  GELOGD("LoadCustAicpuSo in, op name %s, so name %s", op_desc->GetName().c_str(), so_name.c_str());
   std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
   CustAICPUKernelPtr aicpu_kernel = op_desc->TryGetExtAttr(OP_EXTATTR_CUSTAICPU_KERNEL, CustAICPUKernelPtr());
   if (aicpu_kernel == nullptr) {
@@ -1267,18 +1267,24 @@ Status ModelManager::LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_
     std::map<string, CustAICPUKernelPtr> new_so_name;
     new_so_name.insert({so_name, aicpu_kernel});
     cust_aicpu_so_[resource_id] = new_so_name;
-    GELOGI("LoadCustAicpuSo new aicpu so resource id %lu", resource_id);
+    loaded = false;
+    GELOGD("LoadCustAicpuSo new aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
     return SUCCESS;
   }
   auto it_so_name = it->second.find(so_name);
   if (it_so_name == it->second.end()) {
     it->second.insert({so_name, aicpu_kernel});
-    GELOGI("LoadCustAicpuSo add aicpu so resource id %lu", resource_id);
+    loaded = false;
+    GELOGD("LoadCustAicpuSo add aicpu so name %s, resource id %lu", so_name.c_str(), resource_id);
+    return SUCCESS;
   }
+  loaded = true;
+  GELOGD("LoadCustAicpuSo so name %s has been loaded.", so_name.c_str());
   return SUCCESS;
 }
 
 Status ModelManager::LaunchKernelCustAicpuSo(const string &kernel_name) {
+  GELOGD("Aicpu kernel launch task in, kernel name %s.", kernel_name.c_str());
   std::lock_guard<std::mutex> lock(cust_aicpu_mutex_);
   if (cust_aicpu_so_.size() == 0) return SUCCESS;
   // get current context
diff --git a/ge/graph/load/new_model_manager/model_manager.h b/ge/graph/load/new_model_manager/model_manager.h
index 9821a4ab..c1faed82 100755
--- a/ge/graph/load/new_model_manager/model_manager.h
+++ b/ge/graph/load/new_model_manager/model_manager.h
@@ -286,7 +286,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status DestroyAicpuSessionForInfer(uint32_t model_id);
 
-  ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name);
+  ge::Status LoadCustAicpuSo(const OpDescPtr &op_desc, const string &so_name, bool &loaded);
 
   ge::Status LaunchCustAicpuSo();
 
diff --git a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
index 3e3a715d..7b11c53e 100755
--- a/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -875,7 +875,9 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
   }
 
   if (kernel_type_ == ccKernelType::CUST_AI_CPU) {
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_), "launch cust aicpu so failed");
+    bool loaded = false;
+    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name_, loaded),
+            "launch cust aicpu so failed");
   }
 
   // copy args to new host memory
diff --git a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
index e94fa425..a4d14fb0 100644
--- a/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
+++ b/ge/graph/load/new_model_manager/task_info/super_kernel/super_kernel.cc
@@ -25,7 +25,7 @@ Status SuperKernel::Launch(rtStream_t stream, uint32_t dump_flag) {
   const void *args[] = {this->GetNavTablePtr(),
                         reinterpret_cast<const void *>(static_cast<uintptr_t>(this->GetNavTableSize()))};
 
-  rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(device_args_addr_), sizeof(args), RT_MEMORY_HBM);
+  rtError_t rt_ret = rtMalloc(reinterpret_cast<void **>(&device_args_addr_), sizeof(args), RT_MEMORY_HBM);
   GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "rtMalloc failied. error: 0x%X", rt_ret);
                   return RT_ERROR_TO_GE_STATUS(rt_ret);)
   rt_ret = rtMemcpy(reinterpret_cast<void *>(device_args_addr_), sizeof(args), (void *)args, sizeof(args),
diff --git a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
index 573739bc..38407160 100755
--- a/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
+++ b/ge/hybrid/node_executor/aicpu/aicpu_node_executor.cc
@@ -644,8 +644,12 @@ Status AicpuNodeTask::Init(const HybridModel &model) {
   const auto &context = kernel_def.context();
   auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
   if (kernel_type == ccKernelType::CUST_AI_CPU) {
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name), "load cust aicpu so failed.");
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
+    bool loaded = false;
+    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc, so_name, loaded),
+            "load cust aicpu so failed.");
+    if (!loaded) {
+      GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "Launch cust aicpu so failed.");
+    }
   }
 
   GE_CHK_BOOL_RET_STATUS(args.size() == args_size_, FAILED,
diff --git a/ge/single_op/task/aicpu_kernel_task_builder.cc b/ge/single_op/task/aicpu_kernel_task_builder.cc
index 600c9c29..0b459e7a 100755
--- a/ge/single_op/task/aicpu_kernel_task_builder.cc
+++ b/ge/single_op/task/aicpu_kernel_task_builder.cc
@@ -62,8 +62,12 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task, uint64_t kernel_id) {
   if (kernel_type == ccKernelType::CUST_AI_CPU) {
     task.is_custom_ = true;
     task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU;
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed");
-    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
+    bool loaded = false;
+    GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name, loaded),
+            "launch cust aicpu so failed");
+    if (!loaded) {
+      GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed.");
+    }
   }
 
   task.num_inputs_ = op_desc_->GetInputsSize();