From ddc6c58a3bf7fb2095fd98b945e92923e1c6ccad Mon Sep 17 00:00:00 2001
From: yanghaoran <yanghaoran2@huawei.com>
Date: Thu, 17 Sep 2020 19:04:09 +0800
Subject: [PATCH] Synchronize latest Ascend software suite on r0.5 17 Sep 2020

---
 inc/external/ge/ge_api_types.h                |   5 -
 inc/external/graph/operator.h                 |   2 +
 inc/external/register/register.h              |   4 +
 inc/framework/common/ge_types.h               |   1 +
 inc/framework/common/helper/model_helper.h    |   2 +
 inc/framework/executor/ge_executor.h          |   2 +-
 inc/framework/omg/omg.h                       |   6 +-
 inc/framework/omg/omg_inner_types.h           |   2 +
 inc/graph/debug/ge_attr_define.h              |   4 +
 inc/graph/shape_refiner.h                     |   1 +
 src/common/graph/ge_attr_define.cc            |   3 +
 src/common/graph/graph.mk                     |  51 +-
 src/common/graph/node.cc                      |   1 +
 src/common/graph/op_desc.cc                   |  10 +-
 src/common/graph/operator.cc                  |   7 +-
 src/common/graph/shape_refiner.cc             |   4 +
 src/common/graph/stub/Makefile                |   6 +
 src/common/graph/stub/gen_stubapi.py          | 573 ++++++++++++++++++
 src/ge/common/ge/tbe_plugin_manager.cc        |  34 +-
 src/ge/common/ge/tbe_plugin_manager.h         |   1 -
 src/ge/common/helper/model_helper.cc          |  35 +-
 src/ge/common/model_parser/base.cc            |   5 +-
 src/ge/common/profiling/profiling_manager.cc  |  59 +-
 src/ge/common/profiling/profiling_manager.h   |   5 +
 src/ge/common/properties_manager.cc           |   7 +-
 src/ge/common/properties_manager.h            |   2 +-
 src/ge/executor/ge_executor.cc                |   7 +-
 src/ge/ge_inference.mk                        |  24 +-
 .../ge_local_engine/engine/host_cpu_engine.cc |   1 +
 src/ge/generator/ge_generator.cc              |  48 +-
 .../graph/build/memory/block_mem_assigner.cc  |  12 +-
 .../graph/build/memory/block_mem_assigner.h   |   2 +
 .../graph/build/memory/graph_mem_assigner.cc  |   1 +
 src/ge/graph/execute/graph_execute.cc         |   6 +-
 src/ge/graph/execute/graph_execute.h          |   2 +-
 .../load/new_model_manager/data_dumper.cc     |  77 ++-
 .../load/new_model_manager/data_dumper.h      |   9 +-
 .../load/new_model_manager/davinci_model.cc   | 192 +++---
 .../load/new_model_manager/davinci_model.h    |  20 +-
 .../load/new_model_manager/model_manager.cc   |  12 +-
 .../load/new_model_manager/model_manager.h    |   2 +-
 .../task_info/end_graph_task_info.cc          |   3 +-
 .../task_info/kernel_ex_task_info.cc          |   3 +-
 .../task_info/kernel_task_info.cc             |  12 +-
 src/ge/graph/manager/graph_manager.cc         |   6 +-
 src/ge/graph/manager/graph_var_manager.cc     |   2 +-
 src/ge/graph/partition/graph_partition.cc     |   8 +-
 .../same_transdata_breadth_fusion_pass.cc     |   1 +
 .../transop_without_reshape_fusion_pass.cc    |   1 +
 src/ge/graph/preprocess/graph_preprocess.cc   | 131 +---
 .../graph/preprocess/insert_op/ge_aipp_op.cc  |   4 +-
 .../insert_op/util_insert_aipp_op.cc          |  92 ++-
 .../insert_op/util_insert_aipp_op.h           |   2 +
 .../preprocess/multi_batch_copy_graph.cc      |  19 +-
 src/ge/host_kernels/concat_v2_kernel.cc       |  53 +-
 src/ge/host_kernels/concat_v2_kernel.h        |   2 +-
 src/ge/init/gelib.cc                          |  25 +-
 src/ge/offline/main.cc                        |  48 +-
 src/ge/offline/single_op_parser.cc            |   8 -
 src/ge/session/omg.cc                         |  67 +-
 src/ge/single_op/single_op_manager.cc         |  15 +-
 src/ge/stub/Makefile                          |   6 +
 src/ge/stub/README                            |   4 +
 src/ge/stub/gen_stubapi.py                    | 573 ++++++++++++++++++
 src/proto/fusion_model.proto                  |   3 +-
 tests/st/resnet50/common.cc                   |   0
 .../graph/passes/flow_ctrl_pass_unittest.cc   |   0
 .../expanddims_kernel_unittest.cc             |   0
 .../ut/ge/graph/passes/merge_pass_unittest.cc |   0
 .../graph/passes/net_output_pass_unittest.cc  |   0
 .../ge/graph/passes/snapshot_pass_unittest.cc |   0
 .../single_op/single_op_manager_unittest.cc   |   0
 .../ge/single_op/single_op_model_unittest.cc  |   0
 .../inc/ops/elewise_calculation_ops.h         |  11 +-
 third_party/fwkacllib/inc/ops/image_ops.h     |  11 +-
 .../inc/ops/matrix_calculation_ops.h          |  40 +-
 .../fwkacllib/inc/ops/nn_batch_norm_ops.h     |  12 +-
 .../fwkacllib/inc/ops/nn_calculation_ops.h    |  69 +--
 third_party/fwkacllib/inc/ops/nn_detect_ops.h |   2 +-
 .../fwkacllib/inc/ops/nn_pooling_ops.h        |  25 +-
 .../fwkacllib/inc/ops/nn_training_ops.h       |  20 +-
 .../fwkacllib/inc/ops/nonlinear_fuc_ops.h     |   6 +-
 third_party/fwkacllib/inc/ops/quantize_ops.h  |  18 +-
 third_party/fwkacllib/inc/ops/selection_ops.h |  21 +-
 .../fwkacllib/inc/ops/transformation_ops.h    |  37 +-
 .../fwkacllib/inc/register/op_registry.h      |   3 +
 third_party/fwkacllib/inc/runtime/context.h   |   8 +
 third_party/fwkacllib/inc/toolchain/slog.h    | 107 +++-
 88 files changed, 2123 insertions(+), 602 deletions(-)
 create mode 100644 src/common/graph/stub/Makefile
 create mode 100644 src/common/graph/stub/gen_stubapi.py
 create mode 100644 src/ge/stub/Makefile
 create mode 100644 src/ge/stub/README
 create mode 100644 src/ge/stub/gen_stubapi.py
 mode change 100755 => 100644 tests/st/resnet50/common.cc
 mode change 100755 => 100644 tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/graph/passes/merge_pass_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/graph/passes/net_output_pass_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/graph/passes/snapshot_pass_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/single_op/single_op_manager_unittest.cc
 mode change 100755 => 100644 tests/ut/ge/single_op/single_op_model_unittest.cc

diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h
index 5a8482e7..1632f11c 100644
--- a/inc/external/ge/ge_api_types.h
+++ b/inc/external/ge/ge_api_types.h
@@ -204,9 +204,6 @@ const std::string SAVE_ORIGINAL_MODEL = "ge.saveOriginalModel";
 // Save original model file name
 const std::string ORIGINAL_MODEL_FILE = "ge.originalModelFile";
 
-// FE enable quant optimize
-const std::string QUANT_OPTIMIZE = "ge.quantOptimize";
-
 const char *const OPTION_GE_MAX_DUMP_FILE_NUM = "ge.maxDumpFileNum";
 const char *const OPTION_GE_MAX_DUMP_FILE_SIZE = "ge.maxDumpFileSize";
 const char *const OPTION_GE_MAX_DUMP_OP_NUM = "ge.maxDumpOpNum";
@@ -274,7 +271,6 @@ static const char *const ENABLE_SINGLE_STREAM = ge::ENABLE_SINGLE_STREAM;
 static const char *const AICORE_NUM = ge::AICORE_NUM.c_str();
 static const char *const FUSION_SWITCH_FILE = ge::FUSION_SWITCH_FILE.c_str();
 static const char *const ENABLE_SMALL_CHANNEL = ge::ENABLE_SMALL_CHANNEL.c_str();
-static const char *const QUANT_OPTIMIZE = ge::QUANT_OPTIMIZE.c_str();
 static const char *const OP_SELECT_IMPL_MODE = ge::OP_SELECT_IMPL_MODE.c_str();
 static const char *const OUTPUT_TYPE = ge::OUTPUT_DATATYPE.c_str();
 static const char *const BUFFER_OPTIMIZE = ge::BUFFER_OPTIMIZE.c_str();
@@ -304,7 +300,6 @@ const std::set<std::string> global_options = {CORE_TYPE,
                                               AICORE_NUM,
                                               FUSION_SWITCH_FILE,
                                               ENABLE_SMALL_CHANNEL,
-                                              QUANT_OPTIMIZE,
                                               OP_SELECT_IMPL_MODE,
                                               OPTYPELIST_FOR_IMPLMODE};
 }  // namespace ir_option
diff --git a/inc/external/graph/operator.h b/inc/external/graph/operator.h
index 1deae7d9..4f837b9d 100644
--- a/inc/external/graph/operator.h
+++ b/inc/external/graph/operator.h
@@ -43,6 +43,7 @@
 #define DYNAMIC_INPUT_TD_NUM(name) ("__dynamic_input_" + name + "_cnt")
 
 namespace ge {
+class Operator;
 class OperatorImpl;
 class NamedAttrs;
 class Graph;
@@ -50,6 +51,7 @@ class AttrValue;
 
 using SubgraphBuilder = std::function<Graph()>;
 using OperatorImplPtr = std::shared_ptr<OperatorImpl>;
+using OperatorPtr = std::shared_ptr<Operator>;
 
 class OpIO;
 using OutHandler = std::shared_ptr<OpIO>;
diff --git a/inc/external/register/register.h b/inc/external/register/register.h
index 28c984bf..a8421511 100644
--- a/inc/external/register/register.h
+++ b/inc/external/register/register.h
@@ -67,6 +67,7 @@ using google::protobuf::Message;
 class OpRegistrationDataImpl;
 
 using ParseParamFunc = std::function<domi::Status(const google::protobuf::Message *, ge::Operator &)>;
+using ParseParamByOpFunc = std::function<domi::Status(const ge::Operator &, ge::Operator &)>;
 using FusionParseParamFunc =
   std::function<domi::Status(const std::vector<const google::protobuf::Message *>, ge::Operator &)>;
 using ParseSubgraphFunc = std::function<Status(const std::string &subgraph_name, const ge::Graph &graph)>;
@@ -85,6 +86,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData {
 
   OpRegistrationData &ParseParamsFn(const ParseParamFunc &parseParamFn);
 
+  OpRegistrationData &ParseParamsByOperatorFn(const ParseParamByOpFunc &parse_param_by_op_fn);
+
   OpRegistrationData &FusionParseParamsFn(const FusionParseParamFunc &fusionParseParamFn);
 
   OpRegistrationData &ParseSubgraphPostFn(const ParseSubgraphFunc &subgraph_post_fn);
@@ -100,6 +103,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistrationData {
   std::set<std::string> GetOriginOpTypeSet() const;
   domi::FrameworkType GetFrameworkType() const;
   ParseParamFunc GetParseParamFn() const;
+  ParseParamByOpFunc GetParseParamByOperatorFn() const;
   FusionParseParamFunc GetFusionParseParamFn() const;
   ParseSubgraphFunc GetParseSubgraphPostFn() const;
 
diff --git a/inc/framework/common/ge_types.h b/inc/framework/common/ge_types.h
index bcc90d25..27ae28ee 100644
--- a/inc/framework/common/ge_types.h
+++ b/inc/framework/common/ge_types.h
@@ -183,6 +183,7 @@ struct ModelData {
   uint32_t model_len = 0;      // Model binary data length
   int32_t priority = 0;        // Model priority
   std::string key;             // Key path for encrypt model, Empty for unencrypt
+  std::string om_name;         // om file name, used for data dump
 };
 
 // The definition of Model information
diff --git a/inc/framework/common/helper/model_helper.h b/inc/framework/common/helper/model_helper.h
index bd9a6c57..3c9de891 100644
--- a/inc/framework/common/helper/model_helper.h
+++ b/inc/framework/common/helper/model_helper.h
@@ -46,6 +46,8 @@ class ModelHelper {
 
   static Status TransModelToGeModel(const ModelPtr& model, GeModelPtr& ge_model);
   static Status TransGeModelToModel(const GeModelPtr& geModelPtr, ModelPtr& modelPtr);
+  Status GetBaseNameFromFileName(const std::string& file_name, std::string& base_name);
+  Status GetModelNameFromMergedGraphName(const std::string& graph_name, std::string& model_name);
 
  private:
   bool is_assign_model_ = false;
diff --git a/inc/framework/executor/ge_executor.h b/inc/framework/executor/ge_executor.h
index 87e30805..91b50311 100644
--- a/inc/framework/executor/ge_executor.h
+++ b/inc/framework/executor/ge_executor.h
@@ -62,7 +62,7 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY GeExecutor {
 
   // Get input and output descriptor
   ge::Status GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                              std::vector<ge::TensorDesc> &output_desc);
+                              std::vector<ge::TensorDesc> &output_desc, bool new_model_desc = false);
 
   ///
   /// @ingroup ge
diff --git a/inc/framework/omg/omg.h b/inc/framework/omg/omg.h
index 07d78490..c7dbdd5b 100644
--- a/inc/framework/omg/omg.h
+++ b/inc/framework/omg/omg.h
@@ -98,8 +98,10 @@ Status DumpInfershapeJson(const ge::Graph &graph, const char *json_file);
 
 Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const std::string &output_format);
 
-Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
-                     std::vector<std::string> &output_nodes_name);
+Status GetOutputLeaf(ge::NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info);
+
+void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
+                                std::vector<std::string> &output_nodes_name);
 
 void UpdateOmgCtxWithParserCtx();
 
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 8e5bc484..70d59c2f 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -94,6 +94,8 @@ struct OmgContext {
   std::vector<std::pair<std::string, int32_t>> user_out_nodes;
   // net out nodes (where user_out_nodes or leaf nodes)
   std::vector<std::string> net_out_nodes;
+  // net out nodes top names(only caffe has top)
+  std::vector<std::string> out_top_names;
   // path for the aicpu custom operator so_file
   std::vector<std::string> aicpu_op_run_paths;
   // ddk version
diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h
index 873952e1..5db047c0 100644
--- a/inc/graph/debug/ge_attr_define.h
+++ b/inc/graph/debug/ge_attr_define.h
@@ -139,6 +139,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string NEW_AIPP
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_INPUTS;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_AIPP_OUTPUTS;
 
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_DIMS;
+
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_SESSION_GRAPH_ID;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_PARENT_GRAPH_NAME;
 
@@ -181,6 +183,8 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_STREAM_CYCLE_EVENT_FLAG;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS;
 
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_ORIGIN_SIZE;
+
 // to be deleted
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_TO_BE_DELETED;
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string PERMUTE_RESHAPE_FUSION;
diff --git a/inc/graph/shape_refiner.h b/inc/graph/shape_refiner.h
index 65664615..4f8783a3 100644
--- a/inc/graph/shape_refiner.h
+++ b/inc/graph/shape_refiner.h
@@ -31,6 +31,7 @@ class ShapeRefiner {
   static graphStatus InferShapeAndType(const NodePtr &node, bool before_subgraph);
   static graphStatus InferShapeAndType(const NodePtr &node);
   static graphStatus InferShapeAndType(const ConstNodePtr &node, Operator &op);
+  static void ClearContextMap();
 
  private:
   static void PrintInOutTensorShape(const ge::NodePtr &node, const std::string &phase);
diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc
index 1c2c9c71..96638249 100644
--- a/src/common/graph/ge_attr_define.cc
+++ b/src/common/graph/ge_attr_define.cc
@@ -121,6 +121,8 @@ const std::string NEW_AIPP_CONV_OP = "new_conv_op_for_aipp";
 const std::string ATTR_NAME_AIPP_INPUTS = "_aipp_inputs";
 const std::string ATTR_NAME_AIPP_OUTPUTS = "_aipp_outputs";
 
+const std::string ATTR_NAME_INPUT_DIMS = "input_dims";
+
 const std::string ATTR_NAME_SESSION_GRAPH_ID = "_session_graph_id";
 const std::string ATTR_NAME_PARENT_GRAPH_NAME = "_parent_graph_name";
 
@@ -154,6 +156,7 @@ const std::string ATTR_NAME_RTSWITCH_RECV_EVENT_ID = "rtswitch_event_id";
 const std::string ATTR_NAME_AUTOMIC_ADD_START = "automic_add_addr_start";
 const std::string ATTR_NAME_AUTOMIC_ADD_MEM_SIZE = "automic_add_mem_size";
 const std::string ATTR_NAME_DYNAMIC_OUTPUT_DIMS = "_dynamic_output_dims";
+const std::string ATTR_NAME_INPUT_ORIGIN_SIZE = "input_origin_size";
 
 // To be deleted
 const std::string ATTR_TO_BE_DELETED = "to_be_deleted";
diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk
index 744d1725..5eaf7d86 100644
--- a/src/common/graph/graph.mk
+++ b/src/common/graph/graph.mk
@@ -1,5 +1,5 @@
 LOCAL_PATH := $(call my-dir)
-
+include $(LOCAL_PATH)/stub/Makefile
 COMMON_LOCAL_SRC_FILES := \
     ./proto/om.proto \
     ./proto/ge_ir.proto \
@@ -85,6 +85,29 @@ LOCAL_PROPRIETARY_MODULE := true
 
 include $(BUILD_HOST_SHARED_LIBRARY)
 
+#compiler for host
+include $(CLEAR_VARS)
+LOCAL_MODULE := stub/libgraph
+
+LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
+LOCAL_CPPFLAGS += -fexceptions
+
+LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
+LOCAL_SRC_FILES  := \
+    ../../out/atc/lib64/stub/graph.cc \
+    ../../out/atc/lib64/stub/operator.cc \
+    ../../out/atc/lib64/stub/tensor.cc \
+    ../../out/atc/lib64/stub/operator_factory.cc \
+
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_LDFLAGS := -lrt -ldl
+
+LOCAL_MULTILIB := 64
+LOCAL_PROPRIETARY_MODULE := true
+
+include $(BUILD_HOST_SHARED_LIBRARY)
 
 #compiler for device
 include $(CLEAR_VARS)
@@ -111,6 +134,32 @@ LOCAL_PROPRIETARY_MODULE := true
 
 include $(BUILD_SHARED_LIBRARY)
 
+#compiler for device
+include $(CLEAR_VARS)
+LOCAL_MODULE := stub/libgraph
+
+LOCAL_CFLAGS += -O2
+
+LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
+LOCAL_SRC_FILES  := \
+    ../../out/atc/lib64/stub/graph.cc \
+    ../../out/atc/lib64/stub/operator.cc \
+    ../../out/atc/lib64/stub/tensor.cc \
+    ../../out/atc/lib64/stub/operator_factory.cc \
+
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_LDFLAGS := -lrt -ldl
+
+ifeq ($(device_os),android)
+LOCAL_LDFLAGS := -ldl
+endif
+
+LOCAL_MULTILIB := 64
+LOCAL_PROPRIETARY_MODULE := true
+
+include $(BUILD_SHARED_LIBRARY)
 
 # compile for ut/st
 include $(CLEAR_VARS)
diff --git a/src/common/graph/node.cc b/src/common/graph/node.cc
index 1c8f327b..e0939e7e 100644
--- a/src/common/graph/node.cc
+++ b/src/common/graph/node.cc
@@ -759,6 +759,7 @@ graphStatus Node::Verify() const {
         GELOGW("Verify UpdateOutputName failed");
       }
     }
+    node_op.BreakConnect();
   }
 
   if (op_->CommonVerify() == GRAPH_SUCCESS) {
diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc
index ba3c9b33..adb52162 100644
--- a/src/common/graph/op_desc.cc
+++ b/src/common/graph/op_desc.cc
@@ -818,7 +818,9 @@ graphStatus OpDesc::InferShapeAndType() {
     }
   }
   Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
-  return (graphStatus)infer_func_(op_proxy);
+  graphStatus ret = (graphStatus)infer_func_(op_proxy);
+  op_proxy.BreakConnect();
+  return ret;
 }
 
 graphStatus OpDesc::DefaultInferFormat() {
@@ -863,12 +865,14 @@ graphStatus OpDesc::DefaultInferFormat() {
 }
 
 graphStatus OpDesc::OpVerify() {
-  Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
   if (verifier_func_ == nullptr) {
     verifier_func_ = OperatorFactoryImpl::GetVerifyFunc(GetType());
   }
   if (verifier_func_ != nullptr) {
-    return (graphStatus)verifier_func_(op_proxy);
+    Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
+    graphStatus ret = (graphStatus)verifier_func_(op_proxy);
+    op_proxy.BreakConnect();
+    return ret;
   }
   return GRAPH_SUCCESS;
 }
diff --git a/src/common/graph/operator.cc b/src/common/graph/operator.cc
index 8adf56c1..608fafb6 100644
--- a/src/common/graph/operator.cc
+++ b/src/common/graph/operator.cc
@@ -21,7 +21,7 @@
 #include <mutex>
 #include <queue>
 #include <set>
-#include "array_ops.h"
+#include "./array_ops.h"
 #include "debug/ge_log.h"
 #include "debug/ge_op_types.h"
 #include "debug/ge_util.h"
@@ -931,7 +931,7 @@ OperatorImplPtr Operator::GetOperatorImplPtr() const { return operator_impl_; }
 
 void Operator::BreakConnect() const {
   if (operator_impl_ == nullptr) {
-    GELOGE(GRAPH_FAILED, "operator impl is nullptr.");
+    GELOGW("operator impl is nullptr.");
     return;
   }
   operator_impl_->ClearInputLinks();
@@ -1318,6 +1318,8 @@ class GraphBuilderImpl {
       string type = src_op_impl->op_desc_->GetType();
       auto node_op = ge::OperatorFactory::CreateOperator("node_op", type);
       auto tensor_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op);
+      node_op.BreakConnect();
+
       GE_CHK_BOOL_EXEC(tensor_desc != nullptr, continue, "tensor_desc is null.");
       if ((tensor_desc->GetInputsSize() == 0 && tensor_desc->GetOutputsSize() > 0) || type == DATA ||
           type == VARIABLE || type == INITDATA || type == GETNEXT) {
@@ -1542,6 +1544,7 @@ void GraphUtils::BreakConnect(const std::map<OperatorImplPtr, NodePtr> &all_node
     }
     op_impl->ClearOutputLinks();
     op_impl->ClearInputLinks();
+    OperatorKeeper::GetInstance().CheckOutOperator(op_impl);
   }
 }
 }  // namespace ge
diff --git a/src/common/graph/shape_refiner.cc b/src/common/graph/shape_refiner.cc
index 845fe494..833ca868 100644
--- a/src/common/graph/shape_refiner.cc
+++ b/src/common/graph/shape_refiner.cc
@@ -235,6 +235,7 @@ graphStatus ShapeRefiner::InferShapeAndType(const ConstNodePtr &node, Operator &
 
     GELOGD("get op from OperatorFactory success. opType: %s", op_type.c_str());
     auto temp_op_desc = ge::OpDescUtils::GetOpDescFromOperator(node_op);
+    node_op.BreakConnect();
     if (temp_op_desc == nullptr) {
       GELOGE(GRAPH_FAILED, "temp op desc is null");
       return GRAPH_FAILED;
@@ -328,6 +329,9 @@ InferenceContextPtr CreateInferenceContext(const std::unordered_map<NodePtr, Inf
 namespace {
 std::unordered_map<NodePtr, InferenceContextPtr> context_map;
 }
+
+GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY void ShapeRefiner::ClearContextMap() { context_map.clear(); }
+
 GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ShapeRefiner::InferShapeAndType(const NodePtr &node) {
   return InferShapeAndType(node, true);
 }
diff --git a/src/common/graph/stub/Makefile b/src/common/graph/stub/Makefile
new file mode 100644
index 00000000..832adcd5
--- /dev/null
+++ b/src/common/graph/stub/Makefile
@@ -0,0 +1,6 @@
+inc_path := $(shell pwd)/inc/external/
+out_path := $(shell pwd)/out/atc/lib64/stub/
+stub_path := $(shell pwd)/common/graph/stub/
+
+mkdir_stub := $(shell mkdir -p $(out_path))
+graph_local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path))
diff --git a/src/common/graph/stub/gen_stubapi.py b/src/common/graph/stub/gen_stubapi.py
new file mode 100644
index 00000000..6185c479
--- /dev/null
+++ b/src/common/graph/stub/gen_stubapi.py
@@ -0,0 +1,573 @@
+import os
+import re
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s',
+                    level=logging.INFO)
+
+"""
+    this attr is used for symbol table visible
+"""
+GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
+
+"""
+    generate stub func body by return type
+"""
+RETURN_STATEMENTS = {
+    'graphStatus': '    return GRAPH_SUCCESS;',
+    'Status': '    return SUCCESS;',
+    'Graph': '    return Graph();',
+    'Graph&': '    return *this;',
+    'Format': '    return Format();',
+    'Format&': '    return *this;',
+    'Shape': '    return Shape();',
+    'Shape&': '    return *this;',
+    'TensorDesc': '    return TensorDesc();',
+    'TensorDesc&': '    return *this;',
+    'Tensor': '    return Tensor();',
+    'Tensor&': '    return *this;',
+    'Operator': '    return Operator();',
+    'Operator&': '    return *this;',
+    'Ptr': '    return nullptr;',
+    'std::string': '    return "";',
+    'std::string&': '    return "";',
+    'string': ' return "";',
+    'int': '    return 0;',
+    'DataType': '    return DT_FLOAT;',
+    'InferenceContextPtr': '    return nullptr;',
+    'SubgraphBuilder': '    return nullptr;',
+    'OperatorImplPtr': '    return nullptr;',
+    'OutHandler': '    return nullptr;',
+    'std::vector<std::string>': '    return {};',
+    'std::vector<int64_t>': '    return {};',
+    'std::map': '    return {};',
+    'uint32_t': '    return 0;',
+    'int64_t': '    return 0;',
+    'uint64_t': '    return 0;',
+    'size_t': '    return 0;',
+    'float': '    return 0.0f;',
+    'bool': '    return false;',
+}
+
+"""
+    max code len per line in hua_wei software programming specifications
+"""
+max_code_len_per_line = 100
+
+"""
+    white_list_for_debug, include_dir_key_words is to
+    determines which header files to generate cc files from
+    when DEBUG on
+"""
+white_list_for_debug = ["operator.h", "tensor.h",
+                        "graph.h", "operator_factory.h",
+                        "ge_ir_build.h"]
+include_dir_key_words = ["ge", "graph"]
+DEBUG = True
+
+
+def need_generate_func(func_line):
+    """
+    :param func_line:
+    :return:
+    """
+    if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \
+            or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"):
+        return False
+    return True
+
+
+def file_endswith_white_list_suffix(file):
+    """
+    :param file:
+    :return:
+    """
+    if DEBUG:
+        for suffix in white_list_for_debug:
+            if file.endswith(suffix):
+                return True
+        return False
+    else:
+        return True
+
+
+"""
+    belows are patterns used for analyse .h file
+"""
+# pattern function
+pattern_func = re.compile(r"""(^[\s]*)          #leading with space,we will find and delete after
+([a-zA-Z~_]            # void int likely
+.*
+[)]                     #we find )
+(?!.*{)                 # we do not want the case int abc() const { return 1;}
+.*)
+(;.*)                   #we want to find ; and after for we will replace these later
+\n$
+""", re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+# pattern comment
+pattern_comment = re.compile(r'^\s*//')
+pattern_comment_2_start = re.compile(r'^\s*/[*]')
+pattern_comment_2_end = re.compile(r'[*]/\s*$')
+# pattern define
+pattern_define = re.compile(r'^\s*#define')
+pattern_define_return = re.compile(r'\\\s*$')
+# blank line
+pattern_blank_line = re.compile(r'^\s*$')
+# virtual,explicit,friend,static
+pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)')
+# lead space
+pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]')
+# functions will have patterns such as func ( or func(
+# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist
+# format like :"operator = ()"
+pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]')
+# template
+pattern_template = re.compile(r'^\s*template')
+pattern_template_end = re.compile(r'>\s*$')
+# namespace
+pattern_namespace = re.compile(r'namespace.*{')
+# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with
+pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR)
+# {}
+pattern_start = re.compile('{')
+pattern_end = re.compile('}')
+
+line_index = 0
+
+
+class H2CC(object):
+    def __init__(self, input_file, output_file, shared_includes_content):
+        """
+        :param input_file:
+        :param output_file:
+        :param shared_includes_content:
+        """
+        self.input_file = input_file
+        self.output_file = output_file
+        self.shared_includes_content = shared_includes_content
+        self.line_index = 0
+        self.input_fd = open(self.input_file, 'r')
+        self.input_content = self.input_fd.readlines()
+        self.output_fd = open(self.output_file, 'w')
+
+        # The state may be normal_now(in the middle of {}),class_now,namespace_now
+        self.stack = []
+        self.stack_class = []
+        self.stack_template = []
+        # record funcs generated by h2cc func
+        self.func_list_exist = []
+
+    def __del__(self):
+        self.input_fd.close()
+        self.output_fd.close()
+        del self.stack
+        del self.stack_class
+        del self.stack_template
+        del self.func_list_exist
+
+    def just_skip(self):
+        # skip blank line or comment
+        if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search(
+                self.input_content[self.line_index]):  # /n or comment using //
+            self.line_index += 1
+        if pattern_comment_2_start.search(self.input_content[self.line_index]):  # comment using /*
+            while not pattern_comment_2_end.search(self.input_content[self.line_index]):  # */
+                self.line_index += 1
+            self.line_index += 1
+        # skip define
+        if pattern_define.search(self.input_content[self.line_index]):
+            while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search(
+                    self.input_content[self.line_index]):
+                self.line_index += 1
+            self.line_index += 1
+
+    def write_inc_content(self):
+        for shared_include_content in self.shared_includes_content:
+            self.output_fd.write(shared_include_content)
+
+    def h2cc(self):
+        """
+        :return:
+        """
+        logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file)
+        global pattern_comment
+        global pattern_comment_2_start
+        global pattern_comment_2_end
+        global pattern_blank_line
+        global pattern_func
+        global pattern_keyword
+        global pattern_leading_space
+        global pattern_func_name
+        global pattern_template
+        global pattern_template_end
+        global pattern_namespace
+        global pattern_class
+        global pattern_start
+        global pattern_end
+        global line_index
+        # write inc content
+        self.write_inc_content()
+        # core processing cycle, process the input .h file by line
+        while self.line_index < len(self.input_content):
+            # handle comment and blank line
+            self.just_skip()
+
+            # match namespace
+            self.handle_namespace()
+
+            # match template
+            template_string = self.handle_template()
+            # match class
+            line = self.input_content[self.line_index]
+            match_class = pattern_class.search(line)
+            match_start = pattern_start.search(line)
+            handle_class_result = self.handle_class(template_string, line, match_start, match_class)
+            if handle_class_result == "continue":
+                continue
+
+            # match "}"
+            handle_stack_result = self.handle_stack(match_start)
+            if handle_stack_result == "continue":
+                continue
+            # handle func
+            handle_func1_result, line, start_i = self.handle_func1(line)
+            if handle_func1_result == "continue":
+                continue
+
+            # here means func is found
+            # delete key word
+            line = pattern_keyword.sub('', line)
+            logging.info("line[%s]", line)
+
+            # Class member function
+            # if friend we will not add class name
+            friend_match = re.search('friend ', line)
+            if len(self.stack_class) > 0 and not friend_match:
+                line, func_name = self.handle_class_member_func(line, template_string)
+            # Normal functions
+            else:
+                line, func_name = self.handle_normal_func(line, template_string)
+
+            need_generate = need_generate_func(line)
+            # func body
+            line += self.implement_function(line)
+            # comment
+            line = self.gen_comment(start_i) + line
+            # write to out file
+            self.write_func_content(line, func_name, need_generate)
+            # next loop
+            self.line_index += 1
+
+        logging.info('Added %s functions', len(self.func_list_exist))
+        logging.info('Successfully converted,please see ' + self.output_file)
+
+    def handle_func1(self, line):
+        """
+        :param line:
+        :return:
+        """
+        find1 = re.search('[(]', line)
+        if not find1:
+            self.line_index += 1
+            return "continue", line, None
+        find2 = re.search('[)]', line)
+        start_i = self.line_index
+        space_match = pattern_leading_space.search(line)
+        # deal with
+        # int abc(int a,
+        #        int b)
+        if find1 and (not find2):
+            self.line_index += 1
+            line2 = self.input_content[self.line_index]
+            if space_match:
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+            line += line2
+            while self.line_index < len(self.input_content) and (not re.search('[)]', line2)):
+                self.line_index += 1
+                line2 = self.input_content[self.line_index]
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+                line += line2
+
+        match_start = pattern_start.search(self.input_content[self.line_index])
+        match_end = pattern_end.search(self.input_content[self.line_index])
+        if match_start:  # like  ) {  or ) {}    int the last line
+            if not match_end:
+                self.stack.append('normal_now')
+            ii = start_i
+            while ii <= self.line_index:
+                ii += 1
+            self.line_index += 1
+            return "continue", line, start_i
+        logging.info("line[%s]", line)
+        # '  int abc();'->'int abc()'
+        (line, match) = pattern_func.subn(r'\2\n', line)
+        logging.info("line[%s]", line)
+        # deal with case:
+        # 'int \n abc(int a, int b)'
+        if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]):
+            line = self.input_content[start_i - 1] + line
+        line = line.lstrip()
+        if not match:
+            self.line_index += 1
+            return "continue", line, start_i
+        return "pass", line, start_i
+
+    def handle_stack(self, match_start):
+        """
+        :param match_start:
+        :return:
+        """
+        line = self.input_content[self.line_index]
+        match_end = pattern_end.search(line)
+        if match_start:
+            self.stack.append('normal_now')
+        if match_end:
+            top_status = self.stack.pop()
+            if top_status == 'namespace_now':
+                self.output_fd.write(line + '\n')
+            elif top_status == 'class_now':
+                self.stack_class.pop()
+                self.stack_template.pop()
+        if match_start or match_end:
+            self.line_index += 1
+            return "continue"
+
+        if len(self.stack) > 0 and self.stack[-1] == 'normal_now':
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_class(self, template_string, line, match_start, match_class):
+        """
+        :param template_string:
+        :param line:
+        :param match_start:
+        :param match_class:
+        :return:
+        """
+        if match_class:  # we face a class
+            self.stack_template.append(template_string)
+            self.stack.append('class_now')
+            class_name = match_class.group(3)
+
+            # class template specializations: class A<u,Node<u> >
+            if '<' in class_name:
+                k = line.index('<')
+                fit = 1
+                for ii in range(k + 1, len(line)):
+                    if line[ii] == '<':
+                        fit += 1
+                    if line[ii] == '>':
+                        fit -= 1
+                    if fit == 0:
+                        break
+                class_name += line[k + 1:ii + 1]
+            logging.info('class_name[%s]', class_name)
+            self.stack_class.append(class_name)
+            while not match_start:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                match_start = pattern_start.search(line)
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_template(self):
+        line = self.input_content[self.line_index]
+        match_template = pattern_template.search(line)
+        template_string = ''
+        if match_template:
+            match_template_end = pattern_template_end.search(line)
+            template_string = line
+            while not match_template_end:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                template_string += line
+                match_template_end = pattern_template_end.search(line)
+            self.line_index += 1
+        return template_string
+
+    def handle_namespace(self):
+        line = self.input_content[self.line_index]
+        match_namespace = pattern_namespace.search(line)
+        if match_namespace:  # we face namespace
+            self.output_fd.write(line + '\n')
+            self.stack.append('namespace_now')
+            self.line_index += 1
+
+    def handle_normal_func(self, line, template_string):
+        template_line = ''
+        self.stack_template.append(template_string)
+        if self.stack_template[-1] != '':
+            template_line = re.sub(r'\s*template', 'template', self.stack_template[-1])
+            # change '< class T = a, class U = A(3)>' to '<class T, class U>'
+            template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+            template_line = re.sub(r'\s*=.*,', ',', template_line)
+            template_line = re.sub(r'\s*=.*', '', template_line)
+        line = re.sub(r'\s*=.*,', ',', line)
+        line = re.sub(r'\s*=.*\)', ')', line)
+        line = template_line + line
+        self.stack_template.pop()
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def handle_class_member_func(self, line, template_string):
+        template_line = ''
+        x = ''
+        if template_string != '':
+            template_string = re.sub(r'\s*template', 'template', template_string)
+            template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string)
+            template_string = re.sub(r'\s*=.*,', ',', template_string)
+            template_string = re.sub(r'\s*=.*', '', template_string)
+        if self.stack_template[-1] != '':
+            if not (re.search(r'<\s*>', stack_template[-1])):
+                template_line = re.sub(r'^\s*template', 'template', stack_template[-1])
+                if not (re.search(r'<.*>', self.stack_class[-1])):
+                    # for x we get like template<class T, typename U> -> <T,U>
+                    x = re.sub(r'template\s*<', '<', template_line)  # remove template -> <class T, typename U>
+                    x = re.sub(r'\n', '', x)
+                    x = re.sub(r'\s*=.*,', ',', x)
+                    x = re.sub(r'\s*=.*\>', '>', x)
+                    x = x.rstrip()  # remove \n
+                    x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '',
+                               x)  # remove class,typename ->  <T, U>
+                    x = re.sub(r'<\s+', '<', x)
+                    x = re.sub(r'\s+>', '>', x)
+                    x = re.sub(r'\s+,', ',', x)
+                    x = re.sub(r',\s+', ', ', x)
+        line = re.sub(r'\s*=\s+0', '', line)
+        line = re.sub(r'\s*=\s+.*,', ',', line)
+        line = re.sub(r'\s*=\s+.*\)', ')', line)
+        logging.info("x[%s]\nline[%s]", x, line)
+        # if the function is long, void ABC::foo()
+        # breaks into two lines void ABC::\n foo()
+        temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1)
+        if len(temp_line) > max_code_len_per_line:
+            line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1)
+        else:
+            line = temp_line
+        logging.info("line[%s]", line)
+        # add template as the above if there is one
+        template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+        template_line = re.sub(r'\s*=.*,', ',', template_line)
+        template_line = re.sub(r'\s*=.*', '', template_line)
+        line = template_line + template_string + line
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def write_func_content(self, content, func_name, need_generate):
+        if not (func_name in self.func_list_exist) and need_generate:
+            self.output_fd.write(content)
+            self.func_list_exist.append(func_name)
+            logging.info('add func:[%s]', func_name)
+
+    def gen_comment(self, start_i):
+        comment_line = ''
+        # Function comments are on top of function declarations, copy them over
+        k = start_i - 1  # one line before this func start
+        if pattern_template.search(self.input_content[k]):
+            k -= 1
+        if pattern_comment_2_end.search(self.input_content[k]):
+            comment_line = self.input_content[k].lstrip()
+            while not pattern_comment_2_start.search(self.input_content[k]):
+                k -= 1
+                comment_line = self.input_content[k].lstrip() + comment_line
+        else:
+            for j in range(k, 0, -1):
+                c_line = self.input_content[j]
+                if pattern_comment.search(c_line):
+                    c_line = re.sub(r'\s*//', '//', c_line)
+                    comment_line = c_line + comment_line
+                else:
+                    break
+        return comment_line
+
+    @staticmethod
+    def implement_function(func):
+        function_def = ''
+        function_def += '{\n'
+
+        all_items = func.split()
+        start = 0
+        return_type = all_items[start]
+        if return_type == "const":
+            start += 1
+            return_type = all_items[start]
+        if return_type.startswith(('std::map', 'std::set', 'std::vector')):
+            return_type = "std::map"
+        if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')):
+            return_type = "Ptr"
+        if len(all_items) > start + 1 and all_items[start + 1].startswith('&'):
+            return_type += "&"
+        if RETURN_STATEMENTS.__contains__(return_type):
+            function_def += RETURN_STATEMENTS[return_type]
+        else:
+            logging.warning("Unhandled return type[%s]", return_type)
+
+        function_def += '\n'
+        function_def += '}\n'
+        function_def += '\n'
+        return function_def
+
+
+def collect_header_files(path):
+    """
+    :param path:
+    :return:
+    """
+    header_files = []
+    shared_includes_content = []
+    for root, dirs, files in os.walk(path):
+        files.sort()
+        for file in files:
+            if file.find("git") >= 0:
+                continue
+            if not file.endswith('.h'):
+                continue
+            file_path = os.path.join(root, file)
+            file_path = file_path.replace('\\', '/')
+            header_files.append(file_path)
+            include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:])
+            shared_includes_content.append(include_str)
+    return header_files, shared_includes_content
+
+
+def generate_stub_file(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    target_header_files, shared_includes_content = collect_header_files(inc_dir)
+    for header_file in target_header_files:
+        if not file_endswith_white_list_suffix(header_file):
+            continue
+        cc_file = re.sub('.h*$', '.cc', header_file)
+        h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content)
+        h_2_cc.h2cc()
+
+
+def gen_code(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    if not inc_dir.endswith('/'):
+        inc_dir += '/'
+    if not out_cc_dir.endswith('/'):
+        out_cc_dir += '/'
+    for include_dir_key_word in include_dir_key_words:
+        generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir)
+
+
+if __name__ == '__main__':
+    inc_dir = sys.argv[1]
+    out_cc_dir = sys.argv[2]
+    gen_code(inc_dir, out_cc_dir)
diff --git a/src/ge/common/ge/tbe_plugin_manager.cc b/src/ge/common/ge/tbe_plugin_manager.cc
index cdce243c..e02b9422 100644
--- a/src/ge/common/ge/tbe_plugin_manager.cc
+++ b/src/ge/common/ge/tbe_plugin_manager.cc
@@ -187,12 +187,9 @@ void TBEPluginManager::LoadCustomOpLib() {
   std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
   GELOGI("The size of registration_datas is: %zu", registration_datas.size());
   for (OpRegistrationData reg_data : registration_datas) {
-    bool ret = CheckRegisterStatus(reg_data);
-    if (ret) {
-      GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
-             static_cast<uint32_t>(reg_data.GetImplyType()));
-      domi::OpRegistry::Instance()->Register(reg_data);
-    }
+    GELOGD("Begin to register optype: %s, imply_type: %u", reg_data.GetOmOptype().c_str(),
+           static_cast<uint32_t>(reg_data.GetImplyType()));
+    domi::OpRegistry::Instance()->Register(reg_data);
   }
 }
 
@@ -230,31 +227,6 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void TBEPluginManager::LoadPlug
   }
 }
 
-bool TBEPluginManager::CheckRegisterStatus(const OpRegistrationData &reg_data) {
-  bool ret = true;
-  static char *parser_priority = std::getenv("PARSER_PRIORITY");
-  static bool keep_cce = parser_priority != nullptr && string(parser_priority) == "cce";
-  auto ori_optype_set = reg_data.GetOriginOpTypeSet();
-  for (const auto &op_type : ori_optype_set) {
-    domi::ImplyType imply_type = domi::OpRegistry::Instance()->GetImplyTypeByOriOpType(op_type);
-    GELOGD("Enter into reg_data loop. op_type = %s , om_optype_ = %s", op_type.c_str(), reg_data.GetOmOptype().c_str());
-    if (imply_type != domi::ImplyType::BUILDIN) {
-      if ((keep_cce && reg_data.GetImplyType() != domi::ImplyType::CCE) ||
-          (!keep_cce && reg_data.GetImplyType() != domi::ImplyType::TVM)) {
-        GELOGD("op_type[%s] does not need to be changed, om_optype:%s.", op_type.c_str(),
-               reg_data.GetOmOptype().c_str());
-        ret = false;
-      } else {
-        GELOGI("op_type[%s] will be changed to om_optype:%s.", op_type.c_str(), reg_data.GetOmOptype().c_str());
-      }
-    } else {
-      GELOGD("First register in ge initialize, original type: %s, om_optype: %s, imply type: %d.", op_type.c_str(),
-             reg_data.GetOmOptype().c_str(), static_cast<int>(reg_data.GetImplyType()));
-    }
-  }
-  return ret;
-}
-
 Status TBEPluginManager::CheckCustomAiCpuOpLib() {
   std::vector<std::string> vec_op_type;
 
diff --git a/src/ge/common/ge/tbe_plugin_manager.h b/src/ge/common/ge/tbe_plugin_manager.h
index c2ad99b1..82264ae8 100644
--- a/src/ge/common/ge/tbe_plugin_manager.h
+++ b/src/ge/common/ge/tbe_plugin_manager.h
@@ -63,7 +63,6 @@ class TBEPluginManager {
   static void GetCustomOpPath(std::string &customop_path);
   void LoadCustomOpLib();
   static Status CheckCustomAiCpuOpLib();
-  static bool CheckRegisterStatus(const OpRegistrationData &reg_data);
 
   SoHandlesVec handles_vec_;
   static std::map<string, string> options_;
diff --git a/src/ge/common/helper/model_helper.cc b/src/ge/common/helper/model_helper.cc
index 556b43e7..2f95cbb1 100644
--- a/src/ge/common/helper/model_helper.cc
+++ b/src/ge/common/helper/model_helper.cc
@@ -184,7 +184,8 @@ ModelHelper::SaveOriginalGraphToOmModel(const ge::Graph &graph, const std::strin
   // Model
   ModelPtr model_ptr = ge::MakeShared<ge::Model>();
   GE_CHECK_NOTNULL_EXEC(model_ptr, return MEMALLOC_FAILED);
-  model_ptr->SetName(compute_graph->GetName());
+  std::string original_model_name = compute_graph->GetName() + "_original";
+  model_ptr->SetName(original_model_name);
   model_ptr->SetGraph(graph);
   model_ptr->SetVersion(static_cast<uint32_t>(OM_PROTO_VERSION));
   string framework_version;
@@ -504,4 +505,36 @@ Status ModelHelper::ReleaseLocalModelData() noexcept {
   }
   return result;
 }
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelHelper::GetBaseNameFromFileName(const string &file_name,
+                                                                                             string &base_name) {
+  GELOGD("Get base_name from file, file_name:%s", file_name.c_str());
+  GE_CHK_BOOL_EXEC_WARN(!file_name.empty(), return FAILED, "File path may not valid, check params --output");
+  size_t start_position = 0;
+  // using output as base_name (ignore ".om")
+  size_t filename_suffixes = 3;
+  if (file_name.find_last_of('/') != string::npos) {
+    start_position = file_name.find_last_of('/') + 1;
+  }
+  size_t end_position = file_name.length() - filename_suffixes;
+  base_name = file_name.substr(start_position, end_position - start_position);
+  GE_CHK_BOOL_EXEC_WARN(!base_name.empty(), return FAILED, "Get base_name failed, check params --output");
+  return SUCCESS;
+}
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status
+ModelHelper::GetModelNameFromMergedGraphName(const string &graph_name, string &model_name) {
+  GELOGD("Get model_name from graph_name, graph_name:%s", graph_name.c_str());
+  // this can only be used after merged graph(graph name will be append with "_x", x is index);
+  GE_CHK_BOOL_EXEC_WARN(!graph_name.empty(), return FAILED, "File path may not valid, check params --output");
+  size_t start_position = 0;
+  size_t end_position = graph_name.length();
+  // using graph as model_name (ignore "_x", x is the index of graph)
+  if (graph_name.find_last_of('_') != string::npos) {
+    end_position = graph_name.find_last_of('_');
+  }
+  model_name = graph_name.substr(start_position, end_position);
+  GE_CHK_BOOL_EXEC_WARN(!model_name.empty(), return FAILED, "Get model_name failed, check params --output");
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/src/ge/common/model_parser/base.cc b/src/ge/common/model_parser/base.cc
index a9a21ec5..fb6a647f 100644
--- a/src/ge/common/model_parser/base.cc
+++ b/src/ge/common/model_parser/base.cc
@@ -15,7 +15,7 @@
  */
 
 #include "common/model_parser/base.h"
-
+#include "common/helper/model_helper.h"
 #include <securec.h>
 #include <sys/sysinfo.h>
 #include <fstream>
@@ -61,7 +61,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status ModelParserBase::LoadFro
 
   // read data as a block:
   (void)fs.read(data, len);
-
+  ModelHelper model_helper;
+  model_helper.GetBaseNameFromFileName(model_path, model_data.om_name);
   // Set the model data parameter
   model_data.model_data = data;
   model_data.model_len = len;
diff --git a/src/ge/common/profiling/profiling_manager.cc b/src/ge/common/profiling/profiling_manager.cc
index 748b9880..ecbbf5f2 100644
--- a/src/ge/common/profiling/profiling_manager.cc
+++ b/src/ge/common/profiling/profiling_manager.cc
@@ -16,15 +16,12 @@
 
 #include "common/profiling/profiling_manager.h"
 
-#include <nlohmann/json.hpp>
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/debug/log.h"
 #include "framework/common/string_util.h"
 #include "graph/ge_context.h"
 #include "runtime/base.h"
 
-using Json = nlohmann::json;
-
 namespace {
 const char *const kJobID = "jobID";
 const char *const kDeviceID = "deviceID";
@@ -35,6 +32,7 @@ const char *const kEvents = "events";
 const char *const kAiCoreEvents = "ai_core_events";
 const char *const kName = "name";
 const char *const kTraceID = "traceId";
+const char *const kProfDir = "resultPath";
 const size_t kReportMaxLen = 2048;
 }  // namespace
 
@@ -100,6 +98,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
     Json start_prof_conf = Json::parse(config);
     Json &prof_conf = start_prof_conf[kStartCfg][0];
     job_id_ = prof_conf[kJobID];
+    auto iter = prof_conf.find(kProfDir);
+    if (iter != prof_conf.end()) {
+      prof_dir_ = prof_conf[kProfDir];
+    }
     Json &device_id = prof_conf[kDeviceID];
     if (device_id.size() != 0) {
       vector<int32_t>().swap(device_id_);
@@ -126,23 +128,36 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
       }
     }
 
-    GELOGI("Profiling json config from acl:%s", config.c_str());
     Json &features = prof_conf[kFeatures];
+    if (ParseFeaturesFromAclCfg(features) != SUCCESS) {
+      GELOGE(FAILED, "Parse feature from acl cfg failed.");
+      return FAILED;
+    }
+    is_profiling_ = true;
+  } catch (...) {
+    GELOGE(FAILED, "Json conf is not invalid !");
+    return ge::PARAM_INVALID;
+  }
+#endif
+  return ge::SUCCESS;
+}
+
+FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::ParseFeaturesFromAclCfg(
+  const Json &features) {
+#ifdef DAVINCI_SUPPORT_PROFILING
+  try {
     for (size_t i = 0; i < features.size(); ++i) {
-      Json &feature = features[i];
+      const Json &feature = features[i];
       if ((feature.find(kName) == feature.end()) || feature[kName].is_null()) {
         continue;
       }
-
       const std::string &name = feature[kName];
       if (name == "op_trace") {
-        GELOGI("Op trace config from acl");
-        Json &conf = feature[kConf];
-        Json &events = conf[0][kEvents];
+        const Json &conf = feature[kConf];
+        const Json &events = conf[0][kEvents];
         const std::string &ai_core_events = events[0][kAiCoreEvents];
         GELOGI("Op trace config from acl ai_core_events:%s", ai_core_events.c_str());
         is_op_trace_ = true;
-        // op trace get conf
         ProfMgrConf prof_mgr_conf;
         int result = ProfMgrGetConf(ai_core_events, &prof_mgr_conf);
         if (result != 0) {
@@ -154,10 +169,16 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
         GELOGI("Op trace profiling iter num %d,", op_trace_iter_num_);
       } else if (name == "task_trace") {
         is_op_trace_ = false;
+        if (feature.find(kConf) != feature.end()) {
+          const Json &conf = feature[kConf];
+          std::stringstream task_trace_conf;
+          task_trace_conf << conf;
+          task_trace_conf_ = task_trace_conf.str();
+        }
         GELOGI("Task trace config from acl");
       } else if (name == "system_trace") {
         is_op_trace_ = false;
-        Json &conf = feature[kConf];
+        const Json &conf = feature[kConf];
         std::stringstream system_trace_conf;
         system_trace_conf << conf;
         system_trace_conf_ = system_trace_conf.str();
@@ -165,10 +186,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::In
       }
       profiling_opts_.push_back(name);
     }
-
-    is_profiling_ = true;
   } catch (...) {
-    GELOGE(FAILED, "Json conf is not invalid !");
+    GELOGE(ge::PARAM_INVALID, "Json conf feature is not invalid !");
     return ge::PARAM_INVALID;
   }
 #endif
@@ -235,6 +254,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
       p_device[kDeviceID] = std::to_string(device_id);
       p_device[kJobID] = job_id_;
       p_device[kTraceID] = std::to_string(GetContext().TraceId());
+      if (!prof_dir_.empty()) {
+        p_device[kProfDir] = prof_dir_;
+        GELOGI("Prof dir: %s.", prof_dir_.c_str());
+      }
 
       Json features;
       if (is_op_trace_) {
@@ -258,6 +281,10 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
           Json f;
           if (profiling_opts_[i] == "system_trace") {
             f[kConf] = nlohmann::json::parse(system_trace_conf_);
+          } else if (profiling_opts_[i] == "task_trace") {
+            if (!task_trace_conf_.empty()) {
+              f[kConf] = nlohmann::json::parse(task_trace_conf_);
+            }
           }
           f[kName] = profiling_opts_[i];
           features[i] = f;
@@ -292,6 +319,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ge::Status ProfilingManager::St
       GELOGW("ProfMgrStartUp failed.");
       return FAILED;
     }
+    GELOGD("StartProfiling, prof_handle: %p", prof_handle);
     prof_handle_vec_.push_back(prof_handle);
   }
 #endif
@@ -314,8 +342,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::StopProf
   for (size_t i = 0; i < prof_handle_vec_.size(); ++i) {
     int result = ProfMgrStop(prof_handle_vec_[i]);
     if (result != 0) {
-      GELOGW("ProfMgr stop return fail:%d.", result);
-      return;
+      GELOGW("ProfMgr stop return fail:%d, handle:%p", result, prof_handle_vec_[i]);
     }
   }
   vector<void *>().swap(prof_handle_vec_);
diff --git a/src/ge/common/profiling/profiling_manager.h b/src/ge/common/profiling/profiling_manager.h
index 2dc0b407..26ee84ca 100644
--- a/src/ge/common/profiling/profiling_manager.h
+++ b/src/ge/common/profiling/profiling_manager.h
@@ -17,6 +17,7 @@
 #ifndef GE_COMMON_PROFILING_PROFILING_MANAGER_H_
 #define GE_COMMON_PROFILING_PROFILING_MANAGER_H_
 
+#include <nlohmann/json.hpp>
 #include <map>
 #include <string>
 #include <vector>
@@ -30,6 +31,7 @@
 using std::map;
 using std::string;
 using std::vector;
+using Json = nlohmann::json;
 
 namespace ge {
 const std::string GE_PROFILING_MODULE = "Framework";
@@ -84,11 +86,13 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   void PluginUnInit(const std::string &module) const;
 
  private:
+  ge::Status ParseFeaturesFromAclCfg(const Json &feature);
   bool is_profiling_ = false;
   bool is_op_trace_ = false;
   bool is_load_ = false;
   int32_t op_trace_iter_num_ = 0;
   string job_id_;
+  string prof_dir_;
   vector<int32_t> device_id_;
   vector<string> op_trace_conf_;
   vector<string> profiling_opts_;
@@ -96,6 +100,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
   string recv_profiling_config_;
   string send_profiling_config_;
   string system_trace_conf_;
+  string task_trace_conf_;
   const ProfilingEngineImpl engine_;
 };
 }  // namespace ge
diff --git a/src/ge/common/properties_manager.cc b/src/ge/common/properties_manager.cc
index 7321af9f..cf1ada05 100644
--- a/src/ge/common/properties_manager.cc
+++ b/src/ge/common/properties_manager.cc
@@ -208,6 +208,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY std::set<std::string> Propertie
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayerNeedDump(const std::string &model,
+                                                                                         const std::string &om_name,
                                                                                          const std::string &op_name) {
   std::lock_guard<std::mutex> lock(dump_mutex_);
   // if dump all
@@ -216,9 +217,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY bool PropertiesManager::IsLayer
   }
 
   // if this model need dump
-  auto model_iter = model_dump_properties_map_.find(model);
-  if (model_iter != model_dump_properties_map_.end()) {
+  auto om_name_iter = model_dump_properties_map_.find(om_name);
+  auto model_name_iter = model_dump_properties_map_.find(model);
+  if (om_name_iter != model_dump_properties_map_.end() || model_name_iter != model_dump_properties_map_.end()) {
     // if no dump layer info, dump all layer in this model
+    auto model_iter = om_name_iter != model_dump_properties_map_.end() ? om_name_iter : model_name_iter;
     if (model_iter->second.empty()) {
       return true;
     }
diff --git a/src/ge/common/properties_manager.h b/src/ge/common/properties_manager.h
index eb43820c..7cbb5949 100644
--- a/src/ge/common/properties_manager.h
+++ b/src/ge/common/properties_manager.h
@@ -84,7 +84,7 @@ class PropertiesManager {
   void AddDumpPropertyValue(const std::string &model, const std::set<std::string> &layers);
   std::set<std::string> GetAllDumpModel();
   std::set<std::string> GetDumpPropertyValue(const std::string &model);
-  bool IsLayerNeedDump(const std::string &model, const std::string &op_name);
+  bool IsLayerNeedDump(const std::string &model, const std::string &om_name, const std::string &op_name);
   void DeleteDumpPropertyValue(const std::string &model);
   void ClearDumpPropertyValue();
   bool QueryModelDumpStatus(const std::string &model);
diff --git a/src/ge/executor/ge_executor.cc b/src/ge/executor/ge_executor.cc
index 210eecd6..b5a3b3cf 100644
--- a/src/ge/executor/ge_executor.cc
+++ b/src/ge/executor/ge_executor.cc
@@ -452,7 +452,7 @@ Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData
 
 // Get input and output descriptor
 Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
-                                    std::vector<ge::TensorDesc> &output_desc) {
+                                    std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {
   GELOGI("get model desc info begin.");
   if (!isInit_) {
     GELOGE(GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
@@ -464,8 +464,8 @@ Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDes
   std::vector<uint32_t> input_formats;
   std::vector<uint32_t> output_formats;
 
-  Status ret =
-    GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats, output_formats);
+  Status ret = GraphExecutor::GetInputOutputDescInfo(model_id, input_desc_infos, output_desc_infos, input_formats,
+                                                     output_formats, new_model_desc);
   if (ret != domi::SUCCESS) {
     GELOGE(ret, "GetInputOutputDescInfo failed. ret = %u", ret);
     return TransferDomiErrorCode(ret);
@@ -641,7 +641,6 @@ Status GeExecutor::LoadDataFromFile(const std::string &path, ModelData &model_da
       model_data.model_data = nullptr;
     }
   }
-
   return ret;
 }
 
diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk
index e12989c0..2b26b214 100644
--- a/src/ge/ge_inference.mk
+++ b/src/ge/ge_inference.mk
@@ -1,5 +1,5 @@
 LOCAL_PATH := $(call my-dir)
-
+include $(LOCAL_PATH)/stub/Makefile
 COMMON_LOCAL_SRC_FILES := \
     proto/fusion_model.proto \
     proto/optimizer_priority.proto \
@@ -353,6 +353,28 @@ LOCAL_SHARED_LIBRARIES := \
 LOCAL_LDFLAGS := -lrt -ldl
 
 
+include $(BUILD_HOST_SHARED_LIBRARY)
+
+#compiler for host infer
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := stub/libge_compiler
+
+LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
+LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP
+ifeq ($(DEBUG), 1)
+LOCAL_CFLAGS += -g -O0
+endif
+
+LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
+
+LOCAL_SRC_FILES := ../../out/atc/lib64/stub/ge_ir_build.cc
+
+
+LOCAL_SHARED_LIBRARIES :=
+
+LOCAL_LDFLAGS := -lrt -ldl
+
 include $(BUILD_HOST_SHARED_LIBRARY)
 
 #compiler for device
diff --git a/src/ge/ge_local_engine/engine/host_cpu_engine.cc b/src/ge/ge_local_engine/engine/host_cpu_engine.cc
index 86f58b23..fd1b20d3 100644
--- a/src/ge/ge_local_engine/engine/host_cpu_engine.cc
+++ b/src/ge/ge_local_engine/engine/host_cpu_engine.cc
@@ -131,6 +131,7 @@ Status HostCpuEngine::RunInternal(const ge::OpDescPtr &op_desc, HostCpuOp &op_ke
     GELOGE(FAILED, "Failed to compute host cpu op. node = %s, ret = %u", op_desc->GetName().c_str(), ret);
     return FAILED;
   }
+  op.BreakConnect();
 
   return SUCCESS;
 }
diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc
index f0b69242..b01f7591 100644
--- a/src/ge/generator/ge_generator.cc
+++ b/src/ge/generator/ge_generator.cc
@@ -20,6 +20,7 @@
 #include "common/helper/model_helper.h"
 #include "common/helper/om_file_helper.h"
 #include "common/util.h"
+#include "common/util/error_manager/error_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "ge/ge_api.h"
 #include "graph/ge_context.h"
@@ -125,17 +126,7 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, GeTen
   if (data_op == nullptr) {
     return FAILED;
   }
-  auto op_desc = node->GetOpDesc();
-  GE_CHECK_NOTNULL_EXEC(op_desc, return PARAM_INVALID);
-  auto input_desc = op_desc->MutableInputDesc(index);
-  GE_CHECK_NOTNULL_EXEC(input_desc, return PARAM_INVALID);
-  ge::Format old_format = input_desc->GetFormat();
-  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
-    input_desc->SetFormat(FORMAT_ND);
-    input_desc->SetOriginFormat(FORMAT_ND);
-    (void)AttrUtils::SetStr(data_op, "_single_input_format", TypeUtils::FormatToSerialString(old_format));
-    (void)AttrUtils::SetBool(data_op, "_is_single_op", true);
-  }
+  (void)AttrUtils::SetBool(data_op, "_is_single_op", true);
 
   GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
   GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
@@ -157,17 +148,7 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
   if (op_desc == nullptr) {
     return FAILED;
   }
-  auto single_op_desc = node->GetOpDesc();
-  GE_CHECK_NOTNULL_EXEC(single_op_desc, return PARAM_INVALID);
-  auto output_desc = single_op_desc->MutableOutputDesc(0);
-  GE_CHECK_NOTNULL_EXEC(output_desc, return PARAM_INVALID);
-  ge::Format old_format = output_desc->GetFormat();
-  if (old_format == FORMAT_FRACTAL_NZ || old_format == FORMAT_FRACTAL_Z) {
-    output_desc->SetFormat(FORMAT_ND);
-    output_desc->SetOriginFormat(FORMAT_ND);
-    (void)AttrUtils::SetStr(op_desc, "_single_output_format", TypeUtils::FormatToSerialString(old_format));
-    (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
-  }
+  (void)AttrUtils::SetBool(op_desc, "_is_single_op", true);
   int32_t count = 0;
   for (const auto &out_desc : outputs) {
     GeTensorDesc tensor = out_desc.GetTensorDesc();
@@ -212,19 +193,6 @@ static void GetOpsProtoPath(string &opsproto_path) {
   opsproto_path = (path_base + "ops/op_proto/custom/" + ":") + (path_base + "ops/op_proto/built-in/");
 }
 
-static string GetModelNameFromFileName(const string &file_name_prefix) {
-  int start_position = 0;
-  // using output as model_name (ignore ".om")
-  int filename_suffixes = 3;
-  if (file_name_prefix.find_last_of('/') != string::npos) {
-    start_position += 1;
-  }
-  int end_position = file_name_prefix.length() - filename_suffixes;
-  string model_name = file_name_prefix.substr(start_position, end_position - start_position);
-  GELOGI("Get model_name from file, model_name:%s", model_name.c_str());
-  return model_name;
-}
-
 class GeGenerator::Impl {
  public:
   Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GraphId &graph_id, GeRootModelPtr &ge_models);
@@ -332,8 +300,6 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
   GraphId graph_id;
   GeRootModelPtr ge_root_model = nullptr;
   GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
-  const string model_name = GetModelNameFromFileName(file_name_prefix);
-  GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(model_name.empty(), return PARAM_INVALID, "om name is not valid!");
   impl_->is_offline_ = is_offline;
   Status ret = impl_->BuildModel(graph, inputs, graph_id, ge_root_model);
   if (ret != SUCCESS) {
@@ -345,9 +311,15 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
   }
   GE_CHECK_NOTNULL(ge_root_model);
   GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
+  ModelHelper model_helper;
+  string model_name = "";
+  Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(), model_name);
+  if (name_ret != SUCCESS) {
+    GELOGE(FAILED, "Get model_name failed. Param --output is invalid");
+    return PARAM_INVALID;
+  }
   map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
   GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
-
   GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model can not be null");
   ge_model->SetName(model_name);
   ret = impl_->SaveModel(file_name_prefix, ge_model, model);
diff --git a/src/ge/graph/build/memory/block_mem_assigner.cc b/src/ge/graph/build/memory/block_mem_assigner.cc
index 602b71bd..df7912fa 100644
--- a/src/ge/graph/build/memory/block_mem_assigner.cc
+++ b/src/ge/graph/build/memory/block_mem_assigner.cc
@@ -38,6 +38,7 @@
 namespace {
 const char *const kAttrNameWorkspaceReuseFlag = "workspace_reuse_flag";
 const char *const kL2FusionDynamicConvergeOp = "l2fusion_dynamic_converge_op";
+const char *const kOpNoReuseMem = "no_reuse_mem_flag";
 const char *const kDisableReuseMemory = "ge.exec.disableReuseMemory";
 const char *const OP_NO_REUSE_MEM = "OP_NO_REUSE_MEM";
 const int kReuseMaxCount = 10;
@@ -624,8 +625,8 @@ MemoryBlock *BlockMemAssigner::ApplyMemory(size_t block_size, size_t real_size,
   (void)ge::GetContext().GetOption(kDisableReuseMemory, ge_disable_reuse_mem_env);
   if (ge_disable_reuse_mem_env != "1") {
     bool reuse_mem_flag = !((workspace_reuse_flag.size() > out_index) && !workspace_reuse_flag[out_index]);
-    is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && reuse_mem_flag && is_op_reuse_mem &&
-                      (IsPreReuse(n, out_index));
+    is_reuse_memory = !node_op_desc->HasAttr(kL2FusionDynamicConvergeOp) && !node_op_desc->HasAttr(kOpNoReuseMem) &&
+                      reuse_mem_flag && is_op_reuse_mem && (IsPreReuse(n, out_index));
     auto stream_id = node_op_desc->GetStreamId();
     auto map_iter = reusable_streams_map_.find(stream_id);
     if (is_reuse_memory && map_iter != reusable_streams_map_.end()) {
@@ -1182,6 +1183,9 @@ void ReAssignContinuousBlocks(const std::vector<MemoryBlock *> &org_blocks,
 
     GELOGI("Block continuous input index:%d", memory_block->input_index_);
     count++;
+    if (count == 1) {
+      memory_block->first_continuous_block_ = true;
+    }
     if (count == continuous_blocks.size()) {
       memory_block->last_continuous_block_ = true;
     }
@@ -1242,6 +1246,10 @@ void BlockMemAssigner::ResizeMemoryBlocks() {
     if (memory_block == nullptr || memory_block->deleted_block_ || memory_block->is_zero_copy_) {
       continue;
     }
+    if (memory_block->first_continuous_block_) {
+      mem_offset_ += MEM_ALIGN_SIZE;
+    }
+
     memory_block->Resize();
     memory_block->SetHeadOffset(mem_offset_);
     mem_offset_ += memory_block->Size();
diff --git a/src/ge/graph/build/memory/block_mem_assigner.h b/src/ge/graph/build/memory/block_mem_assigner.h
index 14aba576..8ee4506e 100644
--- a/src/ge/graph/build/memory/block_mem_assigner.h
+++ b/src/ge/graph/build/memory/block_mem_assigner.h
@@ -64,6 +64,7 @@ class MemoryBlock {
         reuse_mem_(reuse_mem),
         input_index_(0),
         continuous_block_(false),
+        first_continuous_block_(false),
         last_continuous_block_(false),
         is_zero_copy_(false),
         block_size_(block_size),
@@ -129,6 +130,7 @@ class MemoryBlock {
   bool reuse_mem_;
   uint32_t input_index_;
   bool continuous_block_;
+  bool first_continuous_block_;
   bool last_continuous_block_;
   bool is_zero_copy_;
   std::map<int64_t, size_t> depend_stream_life_;
diff --git a/src/ge/graph/build/memory/graph_mem_assigner.cc b/src/ge/graph/build/memory/graph_mem_assigner.cc
index 931ebba4..c4aca639 100644
--- a/src/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/src/ge/graph/build/memory/graph_mem_assigner.cc
@@ -446,6 +446,7 @@ Status GraphMemoryAssigner::AssignContinuousOutputMemory(const ge::NodePtr &node
     return ge::FAILED;
   }
 
+  memory_offset_[0].mem_offset_ += MEM_ALIGN_SIZE;
   for (auto &out_data_anchor : node->GetAllOutDataAnchors()) {
     output_list[out_data_anchor->GetIdx()] = memory_offset_[0].mem_offset_;
     size_t pre_mem_offset = memory_offset_[0].mem_offset_;
diff --git a/src/ge/graph/execute/graph_execute.cc b/src/ge/graph/execute/graph_execute.cc
index 9293b9af..b021ce55 100644
--- a/src/ge/graph/execute/graph_execute.cc
+++ b/src/ge/graph/execute/graph_execute.cc
@@ -450,11 +450,13 @@ Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<Inp
 
 Status GraphExecutor::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                              vector<InputOutputDescInfo> &output_desc,
-                                             std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats) {
+                                             std::vector<uint32_t> &input_formats, std::vector<uint32_t> &out_formats,
+                                             bool new_model_desc) {
   try {
     auto model_manager = ge::ModelManager::GetInstance();
     GE_CHECK_NOTNULL(model_manager);
-    Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats);
+    Status ret = model_manager->GetInputOutputDescInfo(model_id, input_desc, output_desc, input_formats, out_formats,
+                                                       new_model_desc);
     if (ret != SUCCESS) {
       GELOGE(ret, "GetInputOutputDescInfo  failed.");
       CsaInteract::GetInstance().WriteErrorCode(ret, ERROR_MODULE_FMK, JOBSUBSTATE_GRAPH_EXEC);
diff --git a/src/ge/graph/execute/graph_execute.h b/src/ge/graph/execute/graph_execute.h
index ae467515..0518cf11 100644
--- a/src/ge/graph/execute/graph_execute.h
+++ b/src/ge/graph/execute/graph_execute.h
@@ -71,7 +71,7 @@ class GraphExecutor {
 
   static Status GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                        vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &input_formats,
-                                       std::vector<uint32_t> &output_formats);
+                                       std::vector<uint32_t> &output_formats, bool new_model_desc = false);
 
   static Status GetAIPPInfo(uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info);
 
diff --git a/src/ge/graph/load/new_model_manager/data_dumper.cc b/src/ge/graph/load/new_model_manager/data_dumper.cc
index 47f6ffcf..653a3fa1 100644
--- a/src/ge/graph/load/new_model_manager/data_dumper.cc
+++ b/src/ge/graph/load/new_model_manager/data_dumper.cc
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "common/debug/log.h"
 #include "common/properties_manager.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/common/util.h"
@@ -28,6 +29,7 @@
 #include "graph/debug/ge_attr_define.h"
 #include "graph/load/new_model_manager/model_utils.h"
 #include "graph/utils/attr_utils.h"
+#include "graph/utils/tensor_utils.h"
 #include "proto/ge_ir.pb.h"
 #include "proto/op_mapping_info.pb.h"
 #include "runtime/mem.h"
@@ -106,6 +108,7 @@ void DataDumper::SetLoopAddr(void *global_step, void *loop_per_iter, void *loop_
 }
 
 void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
+  GELOGI("Start to save data %s message", node->GetName().c_str());
   if (node != nullptr) {
     auto input_op_desc = node->GetOpDesc();
     if (input_op_desc == nullptr) {
@@ -126,6 +129,7 @@ void DataDumper::SaveDumpInput(const std::shared_ptr<Node> &node) {
           {op_desc->GetName(), {input_op_desc, dst_in_data_anchor->GetIdx(), out_data_anchor->GetIdx()}});
       }
     }
+    GELOGI("Save data message successfully");
   }
 }
 
@@ -159,30 +163,39 @@ void DataDumper::SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::s
       return;
     }
 
-    GELOGI("Save input dump task %s, id: %u.", data_op->GetName().c_str(), task_id);
+    int64_t data_size = 0;
+    if (AttrUtils::GetInt(input_tensor, ATTR_NAME_INPUT_ORIGIN_SIZE, data_size)) {
+      GELOGI("Get aipp data size according to attr is %ld", data_size);
+    } else if (TensorUtils::GetTensorSizeInBytes(*input_tensor, data_size) != SUCCESS) {
+      GELOGE(PARAM_INVALID, "Get input size filed");
+      return;
+    }
+
+    GELOGI("Save input dump task %s, id: %u,stream id :%u,data size :%ld", data_op->GetName().c_str(), task_id,
+           stream_id, data_size);
     op_list_.push_back({task_id, stream_id, data_op, args, false, inner_input_mapping.input_anchor_index,
-                        inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims()});
+                        inner_input_mapping.output_anchor_index, input_tensor->GetShape().GetDims(), data_size});
   }
 }
 
 static void SetOpMappingLoopAddr(uintptr_t step_id, uintptr_t loop_per_iter, uintptr_t loop_cond,
                                  aicpu::dump::OpMappingInfo &op_mapping_info) {
   if (step_id != 0) {
-    GELOGI("step_id exist.");
+    GELOGI("step_id exists.");
     op_mapping_info.set_step_id_addr(static_cast<uint64_t>(step_id));
   } else {
     GELOGI("step_id is null.");
   }
 
   if (loop_per_iter != 0) {
-    GELOGI("loop_per_iter exist.");
+    GELOGI("loop_per_iter exists.");
     op_mapping_info.set_iterations_per_loop_addr(static_cast<uint64_t>(loop_per_iter));
   } else {
     GELOGI("loop_per_iter is null.");
   }
 
   if (loop_cond != 0) {
-    GELOGI("loop_cond exist.");
+    GELOGI("loop_cond exists.");
     op_mapping_info.set_loop_cond_addr(static_cast<uint64_t>(loop_cond));
   } else {
     GELOGI("loop_cond is null.");
@@ -211,10 +224,19 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
         output.mutable_shape()->add_dim(dim);
       }
 
+      int64_t output_size = 0;
+      if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
+        GELOGE(PARAM_INVALID, "Get output size filed");
+        return PARAM_INVALID;
+      }
+      GELOGI("Get output size in dump is %ld", output_size);
       std::string origin_name;
       int32_t origin_output_index = -1;
       (void)AttrUtils::GetStr(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
       (void)AttrUtils::GetInt(&output_descs.at(i), ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
+      GE_IF_BOOL_EXEC(output_size <= 0, GELOGE(PARAM_INVALID, "Output size %ld is less than zero", output_size);
+                      return PARAM_INVALID)
+      output.set_size(output_size);
       output.set_original_name(origin_name);
       output.set_original_output_index(origin_output_index);
       output.set_original_output_format(static_cast<int32_t>(output_descs.at(i).GetOriginFormat()));
@@ -247,6 +269,10 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
   int32_t origin_output_index = -1;
   (void)AttrUtils::GetStr(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_NAME, origin_name);
   (void)AttrUtils::GetInt(output_tensor, ATTR_NAME_DATA_DUMP_ORIGIN_OUTPUT_INDEX, origin_output_index);
+  GE_IF_BOOL_EXEC(inner_dump_info.data_size <= 0,
+                  GELOGE(PARAM_INVALID, "The size of data %ld is less than zero", inner_dump_info.data_size);
+                  return PARAM_INVALID)
+  output.set_size(inner_dump_info.data_size);
   output.set_original_name(origin_name);
   output.set_original_output_index(origin_output_index);
   output.set_original_output_format(static_cast<int32_t>(output_tensor->GetOriginFormat()));
@@ -283,6 +309,17 @@ Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::
       input.mutable_shape()->add_dim(dim);
     }
 
+    int64_t input_size = 0;
+    if (AttrUtils::GetInt(&input_descs.at(i), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
+      GELOGI("Get aipp input size according to attr is %ld", input_size);
+    } else if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
+      GELOGE(PARAM_INVALID, "Get input size filed");
+      return PARAM_INVALID;
+    }
+    GELOGI("Get input size in dump is %ld", input_size);
+    GE_IF_BOOL_EXEC(input_size <= 0, GELOGE(PARAM_INVALID, "Input size %ld is less than zero", input_size);
+                    return PARAM_INVALID;)
+    input.set_size(input_size);
     input.set_address(static_cast<uint64_t>(inner_dump_info.args + sizeof(void *) * i));
     task.mutable_input()->Add(std::move(input));
   }
@@ -323,7 +360,7 @@ Status DataDumper::ExecuteLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_in
   }
 
   load_flag_ = true;
-  GELOGI("LoadDumpInfo success, proto size: %zu.", proto_size);
+  GELOGI("LoadDumpInfo success, proto size is: %zu.", proto_size);
   return SUCCESS;
 }
 
@@ -360,11 +397,12 @@ Status DataDumper::ExecuteUnLoadDumpInfo(aicpu::dump::OpMappingInfo &op_mapping_
     return RT_FAILED;
   }
   load_flag_ = false;
-  GELOGI("UnloadDumpInfo success, proto size: %zu.", proto_size);
+  GELOGI("UnloadDumpInfo success, proto size is: %zu.", proto_size);
   return SUCCESS;
 }
 Status DataDumper::LoadDumpInfo() {
-  PrintCheckLog();
+  std::string dump_list_key;
+  PrintCheckLog(dump_list_key);
 
   if (op_list_.empty()) {
     return SUCCESS;
@@ -374,12 +412,13 @@ Status DataDumper::LoadDumpInfo() {
 
   auto dump_path = PropertiesManager::Instance().GetDumpOutputPath();
   op_mapping_info.set_dump_path(PropertiesManager::Instance().GetDumpOutputPath() + std::to_string(device_id_) + "/");
-  op_mapping_info.set_model_name(model_name_);
+  op_mapping_info.set_model_name(dump_list_key);
   op_mapping_info.set_model_id(model_id_);
   op_mapping_info.set_flag(kAicpuLoadFlag);
   op_mapping_info.set_dump_step(PropertiesManager::Instance().GetDumpStep());
   SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
-  GELOGD("Dump step in load dump info is %s", PropertiesManager::Instance().GetDumpStep().c_str());
+  GELOGI("Dump step is %s and dump path  is %s in load dump info", PropertiesManager::Instance().GetDumpStep().c_str(),
+         dump_path.c_str());
 
   for (const auto &op_iter : op_list_) {
     aicpu::dump::Task task;
@@ -441,7 +480,7 @@ void DataDumper::SetEndGraphIdToAicpu(uint32_t task_id, uint32_t stream_id,
   if (PropertiesManager::Instance().GetDumpMode() == kDumpOutput ||
       PropertiesManager::Instance().GetDumpMode() == kDumpInput ||
       PropertiesManager::Instance().GetDumpMode() == kDumpAll) {
-    GELOGI("add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
+    GELOGI("Add end_graph_info to aicpu, task_id is %u, stream_id is %u", end_graph_task_id_, end_graph_stream_id_);
     aicpu::dump::Task task;
     task.set_end_graph(true);
     task.set_task_id(end_graph_task_id_);
@@ -477,7 +516,7 @@ Status DataDumper::UnloadDumpInfo() {
   return SUCCESS;
 }
 
-void DataDumper::PrintCheckLog() {
+void DataDumper::PrintCheckLog(string &dump_list_key) {
   std::set<std::string> model_list = PropertiesManager::Instance().GetAllDumpModel();
   if (model_list.empty()) {
     GELOGI("No model need dump.");
@@ -485,19 +524,21 @@ void DataDumper::PrintCheckLog() {
   }
 
   GELOGI("%zu op need dump in %s.", op_list_.size(), model_name_.c_str());
-  if (model_list.find(ge::DUMP_ALL_MODEL) == model_list.end()) {
-    if (model_list.find(model_name_) == model_list.end()) {
+  bool not_find_by_omname = model_list.find(om_name_) == model_list.end();
+  bool not_find_by_modelname = model_list.find(model_name_) == model_list.end();
+  if (model_list.find(DUMP_ALL_MODEL) == model_list.end()) {
+    if (not_find_by_omname && not_find_by_modelname) {
       std::string model_list_str;
       for (auto &model : model_list) {
         model_list_str += "[" + model + "].";
       }
 
-      GELOGW("Model %s not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
+      GELOGW("Model %s will not be set to dump, dump list: %s", model_name_.c_str(), model_list_str.c_str());
       return;
     }
   }
-
-  std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(model_name_);
+  dump_list_key = not_find_by_omname ? model_name_ : om_name_;
+  std::set<std::string> config_dump_op_list = PropertiesManager::Instance().GetDumpPropertyValue(dump_list_key);
   std::set<std::string> dump_op_list;
   for (auto &inner_dump_info : op_list_) {
     // oplist value OpDescPtr is not nullptr
@@ -506,7 +547,7 @@ void DataDumper::PrintCheckLog() {
 
   for (auto &dump_op : config_dump_op_list) {
     if (dump_op_list.find(dump_op) == dump_op_list.end()) {
-      GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), model_name_.c_str());
+      GELOGW("Op %s set to dump but not exist in model %s or not a valid op.", dump_op.c_str(), dump_list_key.c_str());
     }
   }
 }
diff --git a/src/ge/graph/load/new_model_manager/data_dumper.h b/src/ge/graph/load/new_model_manager/data_dumper.h
index efcc989a..ee5b3241 100644
--- a/src/ge/graph/load/new_model_manager/data_dumper.h
+++ b/src/ge/graph/load/new_model_manager/data_dumper.h
@@ -64,6 +64,8 @@ class DataDumper {
   void SaveDumpTask(uint32_t task_id, uint32_t stream_id, const std::shared_ptr<OpDesc> &op_desc, uintptr_t args);
   void SaveEndGraphId(uint32_t task_id, uint32_t stream_id);
 
+  void SetOmName(const std::string &om_name) { om_name_ = om_name; }
+
   Status LoadDumpInfo();
 
   Status UnloadDumpInfo();
@@ -71,9 +73,13 @@ class DataDumper {
  private:
   void ReleaseDevMem(void **ptr) noexcept;
 
-  void PrintCheckLog();
+  void PrintCheckLog(string &dump_list_key);
 
   std::string model_name_;
+
+  // for inference data dump
+  std::string om_name_;
+
   uint32_t model_id_;
   RuntimeParam runtime_param_;
   void *dev_mem_load_;
@@ -107,6 +113,7 @@ struct DataDumper::InnerDumpInfo {
   int input_anchor_index;
   int output_anchor_index;
   std::vector<int64_t> dims;
+  int64_t data_size;
 };
 
 struct DataDumper::InnerInputMapping {
diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc
index 46dd8201..d1f75062 100644
--- a/src/ge/graph/load/new_model_manager/davinci_model.cc
+++ b/src/ge/graph/load/new_model_manager/davinci_model.cc
@@ -78,7 +78,7 @@ namespace {
 const uint32_t kDataIndex = 0;
 const uint32_t kOutputNum = 1;
 const uint32_t kTrueBranchStreamNum = 1;
-const uint32_t kThreadNum = 16;
+const uint32_t kThreadNum = 1;
 const uint32_t kAddrLen = sizeof(void *);
 const char *const kNeedDestroySpecifiedAicpuKernel = "need_destroy_specified_aicpu_kernel";
 const int kDecimal = 10;
@@ -94,42 +94,9 @@ inline bool IsCallDumpInputOp(const OpDescPtr &op_desc) {
   (void)ge::AttrUtils::GetBool(op_desc, ATTR_NO_TASK_AND_DUMP_NEEDED, skip_task_generate);
   return skip_task_generate;
 }
-
-void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) {
-  uint32_t n, c, h, w;
-  n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N;
-  c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C;
-  h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H;
-  w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W;
-
-  if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
-    if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
-      input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n);
-      input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h);
-      input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w);
-      input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c);
-    }
-    for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) {
-      input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k));
-    }
-  } else {
-    vector<int64_t> origin_input_dims;
-    (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
-    if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
-      input.shape_info.num = origin_input_dims[n];
-      input.shape_info.height = origin_input_dims[h];
-      input.shape_info.width = origin_input_dims[w];
-      input.shape_info.channel = origin_input_dims[c];
-    }
-    for (size_t k = 0; k < origin_input_dims.size(); ++k) {
-      input.shape_info.dims.push_back(origin_input_dims[k]);
-    }
-  }
-}
 }  // namespace
 
 std::mutex DavinciModel::tvm_bin_mutex_;
-std::set<std::string> DavinciModel::tvm_bin_kernel_;
 
 DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener> &listener)
     : weights_mem_base_(nullptr),
@@ -536,7 +503,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
   compute_graph_ = GraphUtils::GetComputeGraph(graph);
   GE_CHK_BOOL_RET_STATUS(compute_graph_ != nullptr, INTERNAL_ERROR, "Get compute graph is nullptr.");
 
-  runtime_param_.graph_id = GetGraphID(compute_graph_->GetName());
+  runtime_param_.graph_id = compute_graph_->GetGraphID();
 
   GE_TIMESTAMP_START(TransAllVarData);
   GE_CHK_STATUS_RET(TransAllVarData(compute_graph_, runtime_param_.graph_id), "TransAllVarData failed.");
@@ -1447,6 +1414,55 @@ Status DavinciModel::GetInputOutputDescInfoForZeroCopy(vector<InputOutputDescInf
   return SUCCESS;
 }
 
+void DavinciModel::CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input) {
+  uint32_t n, c, h, w;
+  n = format == FORMAT_NHWC ? NHWC_DIM_N : NCHW_DIM_N;
+  c = format == FORMAT_NHWC ? NHWC_DIM_C : NCHW_DIM_C;
+  h = format == FORMAT_NHWC ? NHWC_DIM_H : NCHW_DIM_H;
+  w = format == FORMAT_NHWC ? NHWC_DIM_W : NCHW_DIM_W;
+
+  if (is_new_model_desc_ && op_desc->HasAttr(ATTR_NAME_INPUT_DIMS)) {
+    // When static aipp is set, need to get the model input dims which processed by aipp
+    vector<int64_t> model_input_dims;
+    (void)AttrUtils::GetListInt(op_desc, ATTR_NAME_INPUT_DIMS, model_input_dims);
+    if (model_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
+      input.shape_info.num = model_input_dims[n];
+      input.shape_info.height = model_input_dims[h];
+      input.shape_info.width = model_input_dims[w];
+      input.shape_info.channel = model_input_dims[c];
+    }
+    for (size_t k = 0; k < model_input_dims.size(); ++k) {
+      input.shape_info.dims.push_back(model_input_dims[k]);
+    }
+    is_new_model_desc_ = false;
+    return;
+  }
+
+  if (!op_desc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
+    if (op_desc->GetInputDescPtr(0)->GetShape().GetDimNum() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
+      input.shape_info.num = op_desc->GetInputDescPtr(0)->GetShape().GetDim(n);
+      input.shape_info.height = op_desc->GetInputDescPtr(0)->GetShape().GetDim(h);
+      input.shape_info.width = op_desc->GetInputDescPtr(0)->GetShape().GetDim(w);
+      input.shape_info.channel = op_desc->GetInputDescPtr(0)->GetShape().GetDim(c);
+    }
+    for (size_t k = 0; k < op_desc->GetInputDescPtr(0)->GetShape().GetDimNum(); k++) {
+      input.shape_info.dims.push_back(op_desc->GetInputDescPtr(0)->GetShape().GetDim(k));
+    }
+  } else {
+    vector<int64_t> origin_input_dims;
+    (void)AttrUtils::GetListInt(op_desc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
+    if (origin_input_dims.size() == static_cast<size_t>(NORMAL_TENSOR_SIZE)) {
+      input.shape_info.num = origin_input_dims[n];
+      input.shape_info.height = origin_input_dims[h];
+      input.shape_info.width = origin_input_dims[w];
+      input.shape_info.channel = origin_input_dims[c];
+    }
+    for (size_t k = 0; k < origin_input_dims.size(); ++k) {
+      input.shape_info.dims.push_back(origin_input_dims[k]);
+    }
+  }
+}
+
 Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats) {
   for (size_t index = 0; index < data_op_list_.size(); ++index) {
     InputOutputDescInfo input;
@@ -1455,6 +1471,7 @@ Status DavinciModel::GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, s
 
     Format format = data_op_list_[index]->GetInputDescPtr(0)->GetFormat();
     CreateInputDimsInfo(data_op_list_[index], format, input);
+
     input.data_type = data_op_list_[index]->GetInputDescPtr(0)->GetDataType();
     input.name = data_op_list_[index]->GetName();
     int64_t input_size = 0;
@@ -1535,7 +1552,10 @@ Status DavinciModel::GetOutputDescInfo(vector<InputOutputDescInfo> &output_desc,
                              "construct output_name failed.");
       // forward compatbility, if old om has no out_node_name, need to return output follow origin way
       if (out_size == out_node_name.size()) {
-        output_name = out_node_name[index] + ":" + std::to_string(src_index[index]);
+        // neweast plan, the index will add to name during generate model.
+        bool contains_colon = out_node_name[index].find(":") != std::string::npos;
+        output_name =
+          contains_colon ? out_node_name[index] : out_node_name[index] + ":" + std::to_string(src_index[index]);
       } else {
         output_name = std::string("output_") + std::to_string(index) + "_" + src_name[index] + "_" +
                       std::to_string(src_index[index]);
@@ -1966,6 +1986,10 @@ Status DavinciModel::CopyOutputDataToUser(OpDescPtr &op_desc, std::vector<DataBu
                            "Model output data size(%u) does not match required size(%u).", v_output_size[i],
                            data_buf.length);
 
+    if (copy_only_addrs_.count(v_output_data_addr[i]) == 0) {
+      GELOGI("[ZCPY] This addr[%p] has already feed by zero copy.", v_output_data_addr[i]);
+      continue;  // Skip: Feed by zero copy.
+    }
     GELOGI(
       "CopyOutputDataToUser memcpy graph_%u type[F] name[%s] output[%lu] dst[%p] src[%p] mem_size[%u] datasize[%u]",
       runtime_param_.graph_id, op_desc->GetName().c_str(), i, data_buf.data, v_output_data_addr[i], data_buf.length,
@@ -2510,51 +2534,19 @@ Status DavinciModel::UpdateKnownNodeArgs(const vector<void *> &inputs, const vec
 }
 
 Status DavinciModel::InitTaskInfo(domi::ModelTaskDef &model_task_def) {
-  GELOGI("InitTaskInfo in,task size %zu", model_task_def.task().size());
+  GELOGI("InitTaskInfo in,task size %d", model_task_def.task().size());
   task_list_.resize(model_task_def.task_size());
-  std::vector<std::future<Status>> futures(model_task_def.task_size());
-  ThreadPool executor(kThreadNum);
-  rtContext_t ctx = nullptr;
-  rtError_t rt_ret = rtCtxGetCurrent(&ctx);
-  if (rt_ret != RT_ERROR_NONE || ctx == nullptr) {
-    GELOGE(RT_FAILED, "Failed to get current context from rt, error-code 0x%X.", rt_ret);
-    return RT_FAILED;
-  }
-
-  for (int32_t i = 0; i < model_task_def.task_size(); ++i) {
-    std::future<Status> f = executor.commit(
-      [](const domi::TaskDef &task, DavinciModel *model, rtContext_t ctx, int32_t idx) -> Status {
-        rtError_t rt_ret = rtCtxSetCurrent(ctx);
-        if (rt_ret != RT_ERROR_NONE) {
-          GELOGE(RT_FAILED, "Failed to set context from rt, error-code 0x%X.", rt_ret);
-          return RT_FAILED;
-        }
-        Status ret = FAILED;
-        // dynamic shape will create task_list_ before
-        if (model->task_list_[idx] == nullptr) {
-          model->task_list_[idx] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type()));
-          GE_CHECK_NOTNULL(model->task_list_[idx]);
-        }
-        ret = model->task_list_[idx]->Init(task, model);
-        return ret;
-      },
-      model_task_def.task(i), this, ctx, i);
-    if (!f.valid()) {
-      GELOGE(FAILED, "Future is invalid");
-      return FAILED;
-    }
-    futures[i] = std::move(f);
-  }
-
-  Status ret;
-  for (size_t i = 0; i < futures.size(); ++i) {
-    ret = futures[i].get();
+  for (int i = 0; i < model_task_def.task_size(); ++i) {
+    // dynamic shape will create task_list_ before
+    const domi::TaskDef &task = model_task_def.task(i);
+    task_list_[i] = TaskInfoFactory::Instance().Create(static_cast<rtModelTaskType_t>(task.type()));
+    GE_CHECK_NOTNULL(task_list_[i]);
+    Status ret = task_list_[i]->Init(task, this);
     if (ret != SUCCESS) {
-      GELOGE(ret, "Task index %zu init failed.", i);
+      GELOGE(ret, "Task index %d init failed.", i);
       return ret;
     }
   }
-
   GELOGI("InitTaskInfo out");
   return SUCCESS;
 }
@@ -2623,7 +2615,7 @@ Status DavinciModel::DistributeTask() {
         return PARAM_INVALID;
       }
 
-      if (PropertiesManager::Instance().IsLayerNeedDump(name_, op->GetName())) {
+      if (PropertiesManager::Instance().IsLayerNeedDump(name_, om_name_, op->GetName())) {
         SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
       }
     }
@@ -2661,8 +2653,9 @@ Status DavinciModel::DistributeTask() {
 
 void DavinciModel::SetEndGraphId(uint32_t task_id, uint32_t stream_id) {
   auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel();
-  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
-      all_dump_model.find(name_) != all_dump_model.end()) {
+  bool findByOmName = all_dump_model.find(om_name_) != all_dump_model.end();
+  bool findByModelName = all_dump_model.find(name_) != all_dump_model.end();
+  if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() || findByOmName || findByModelName) {
     GELOGI("start save end_graph_info to dumper, task_id is %u, stream_id is %u", task_id, stream_id);
     data_dumper_.SaveEndGraphId(task_id, stream_id);
   }
@@ -2696,7 +2689,7 @@ void DavinciModel::SetOutputOutsideAddr(const std::vector<void *> &outside_addrs
     if (output_outside_addrs_.find(addr) != output_outside_addrs_.end()) {
       continue;
     }
-
+    DisableZeroCopy(addr);  // Data to NetOutput directly.
     (void)output_outside_addrs_.emplace(std::pair<const void *, std::vector<void *>>(addr, {}));
     GELOGI("SetOutputOutsideAddr success.");
   }
@@ -2902,11 +2895,15 @@ Status DavinciModel::UpdateIoTaskArgs(const map<uint32_t, pair<int64_t, void *>>
     }
 
     // For input data, just copy for rts task.
-    if (is_input && copy_only_addrs_.count(addr) > 0) {
-      if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) {
-        GELOGE(FAILED, "Non-zero copy data node copy failed");
-        return FAILED;
+    if (copy_only_addrs_.count(addr) > 0) {
+      if (is_input) {
+        GELOGI("[IMAS] Find addr %p need direct copy from user malloc input %p.", addr, buffer.data);
+        if (rtMemcpy(addr, size, buffer.data, buffer.length, RT_MEMCPY_DEVICE_TO_DEVICE) != RT_ERROR_NONE) {
+          GELOGE(FAILED, "Non-zero copy data node copy failed");
+          return FAILED;
+        }
       }
+      GELOGI("No need to exeucte zero copy task because this addr %p need direct copy.", addr);
       continue;
     }
 
@@ -2953,7 +2950,6 @@ const char *DavinciModel::GetRegisterStub(const string &binfile, const string &s
   } else {
     binfile_key = session_graph_id + "_" + binfile;
   }
-  std::lock_guard<std::mutex> lock(tvm_bin_mutex_);
   auto it = tvm_bin_kernel_.find(binfile_key);
   if (it != tvm_bin_kernel_.end()) {
     return it->c_str();
@@ -3089,7 +3085,6 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) {
   // Online mode FE may call rtFunctionRegister.
   TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
 
-  // Need protection of tvm_bin_mutex_.
   auto it = used_tbe_handle_map_.find(handle_key);
   if (it != used_tbe_handle_map_.end()) {
     // GE registered, increase reference.
@@ -3109,9 +3104,9 @@ void DavinciModel::StoreTbeHandle(const std::string &handle_key) {
 void DavinciModel::CleanTbeHandle() {
   TBEHandleStore &kernel_store = TBEHandleStore::GetInstance();
 
-  std::lock_guard<std::mutex> lock(tvm_bin_mutex_);
   kernel_store.EraseTBEHandle(used_tbe_handle_map_);
   used_tbe_handle_map_.clear();
+  tvm_bin_kernel_.clear();
 }
 
 ///
@@ -3246,15 +3241,8 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
   bool is_dynamic_batch = input_data.is_dynamic_batch;
   InitZeroCopyUtil(is_dynamic_batch, input_use_zero_copy, output_use_zero_copy);
 
-  // Empty task, Just copy input to output, need direct copy.
-  if (task_list_.empty() && (input_use_zero_copy || output_use_zero_copy)) {
-    GELOGE(FAILED, "Empty task, Just copy input to output, need direct copy.");
-    return FAILED;
-  }
-
   GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_PRE_PROC_START));
-  Status ret =
-    input_use_zero_copy ? CopyModelData(input_data, output_data, is_dynamic_batch) : CopyInputData(input_data, true);
+  Status ret = CopyModelData(input_data, output_data, is_dynamic_batch);
   GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy input data to model failed.");
 
   GELOGI("current_data.index=%u", input_data.index);
@@ -3271,7 +3259,7 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
 
   if (!is_async_mode_) {
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_START));
-    ret = output_use_zero_copy ? SyncDataAndDump() : CopyOutputData(input_data.index, output_data);
+    ret = CopyOutputData(input_data.index, output_data);
     GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return INTERNAL_ERROR, "Copy Output data to user failed.");
     GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingOn(), SetProfileTime(MODEL_AFTER_PROC_END));
   }
@@ -3344,17 +3332,6 @@ void DavinciModel::FreeWeightsMem() {
   }
 }
 
-uint32_t DavinciModel::GetGraphID(const std::string &session_graph_id) {
-  std::string session_id = "_";
-  auto pos = session_graph_id.find(session_id);
-  if (pos != std::string::npos) {
-    size_t graph_id_length = session_graph_id.length() - pos - session_id.length();
-    std::string graph_id = session_graph_id.substr(pos + session_id.length(), graph_id_length);
-    return static_cast<uint32_t>(std::strtol(graph_id.c_str(), nullptr, kDecimal));
-  }
-  return 0;
-}
-
 Status DavinciModel::TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id) {
   GELOGI("TransAllVarData start: session_id:%lu, graph_id: %u.", session_id_, graph_id);
   rtContext_t ctx = nullptr;
@@ -3387,6 +3364,7 @@ void DavinciModel::SetDataDumperArgs() {
   data_dumper_.SetModelName(name_);
   data_dumper_.SetModelId(model_id_);
   data_dumper_.SetMemory(runtime_param_);
+  data_dumper_.SetOmName(om_name_);
 
   int32_t device_id = 0;
   rtError_t rt_ret = rtGetDevice(&device_id);
diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h
index 067fa112..9f65fbc4 100644
--- a/src/ge/graph/load/new_model_manager/davinci_model.h
+++ b/src/ge/graph/load/new_model_manager/davinci_model.h
@@ -187,6 +187,8 @@ class DavinciModel {
   // model name
   string Name() { return name_; }
 
+  // om_name
+  string OmName() { return om_name_; }
   // version
   uint32_t Version() const { return version_; }
 
@@ -273,7 +275,7 @@ class DavinciModel {
   /// @brief For TVM Op, avoid Addr Reuse.
   /// @return void*
   ///
-  static const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");
+  const char *GetRegisterStub(const string &tvm_binfile_key, const string &session_graph_model_id = "");
 
   ///
   /// @ingroup ge
@@ -471,6 +473,9 @@ class DavinciModel {
   Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
   Status GetAllAippInputOutputDims(uint32_t index, std::vector<InputOutputDims> &input_dims,
                                    std::vector<InputOutputDims> &output_dims);
+  void SetModelDescVersion(bool is_new_model_desc) { is_new_model_desc_ = is_new_model_desc; }
+  // om file name
+  void SetOmName(string om_name) { om_name_ = om_name; }
 
  private:
   // memory address of weights
@@ -560,6 +565,8 @@ class DavinciModel {
 
   Status InitModelMem(void *dev_ptr, size_t memsize, void *weight_ptr, size_t weightsize);
 
+  void CreateInputDimsInfo(const OpDescPtr &op_desc, Format format, InputOutputDescInfo &input);
+
   Status GetInputDescInfo(vector<InputOutputDescInfo> &input_desc, std::vector<uint32_t> &formats);
 
   Status InitTaskInfo(domi::ModelTaskDef &modelTaskInfo);
@@ -752,8 +759,6 @@ class DavinciModel {
 
   void CreateOutput(uint32_t index, OpDescPtr &op_desc, InputOutputDescInfo &output, uint32_t &format_result);
 
-  uint32_t GetGraphID(const std::string &session_graph_id);
-
   Status TransAllVarData(ComputeGraphPtr &graph, uint32_t graph_id);
   Status CopyVarData(ComputeGraphPtr &graph);
 
@@ -771,6 +776,10 @@ class DavinciModel {
   uint32_t model_id_;
   uint32_t runtime_model_id_;
   string name_;
+
+  // used for inference data dump
+  string om_name_;
+
   uint32_t version_;
   GeModelPtr ge_model_;
 
@@ -860,8 +869,8 @@ class DavinciModel {
   std::set<uint32_t> hcom_streams_;
   RuntimeParam runtime_param_;
 
-  static std::mutex tvm_bin_mutex_;  // lock for tvm maps.
-  static std::set<std::string> tvm_bin_kernel_;
+  static std::mutex tvm_bin_mutex_;
+  std::set<std::string> tvm_bin_kernel_;
 
   std::map<std::string, uint32_t> used_tbe_handle_map_;
 
@@ -884,6 +893,7 @@ class DavinciModel {
   std::map<const void *, void *> knonw_output_data_info_;
 
   vector<uint64_t> batch_size_;
+  bool is_new_model_desc_{false};
 };
 }  // namespace ge
 #endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_
diff --git a/src/ge/graph/load/new_model_manager/model_manager.cc b/src/ge/graph/load/new_model_manager/model_manager.cc
index 8b17a35b..701cef1e 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.cc
+++ b/src/ge/graph/load/new_model_manager/model_manager.cc
@@ -325,6 +325,12 @@ Status ModelManager::DeleteModel(uint32_t id) {
   auto it = model_map_.find(id);
   auto hybrid_model_it = hybrid_model_map_.find(id);
   if (it != model_map_.end()) {
+    uint64_t session_id = it->second->GetSessionId();
+    std::string model_key = std::to_string(session_id) + "_" + std::to_string(id);
+    auto iter_aicpu_kernel = model_aicpu_kernel_.find(model_key);
+    if (iter_aicpu_kernel != model_aicpu_kernel_.end()) {
+      (void)model_aicpu_kernel_.erase(iter_aicpu_kernel);
+    }
     (void)model_map_.erase(it);
   } else if (hybrid_model_it != hybrid_model_map_.end()) {
     (void)hybrid_model_map_.erase(hybrid_model_it);
@@ -685,11 +691,14 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
 
 Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<InputOutputDescInfo> &input_desc,
                                             vector<InputOutputDescInfo> &output_desc,
-                                            std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats) {
+                                            std::vector<uint32_t> &inputFormats, std::vector<uint32_t> &outputFormats,
+                                            bool new_model_desc) {
   std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
   GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID,
                          "GetInputOutputDescInfo Failed, Invalid Model ID %u !", model_id);
 
+  davinci_model->SetModelDescVersion(new_model_desc);
+
   return davinci_model->GetInputOutputDescInfo(input_desc, output_desc, inputFormats, outputFormats);
 }
 
@@ -820,6 +829,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
       return FAILED;
     }
     davinci_model->SetDeviceId(device_id);
+    davinci_model->SetOmName(model.om_name);
 
     /// In multi-threaded inference,  using the same session_id among multiple threads may cause some threads to fail.
     /// These session_ids come from the same model, so the values of session_id are the same.
diff --git a/src/ge/graph/load/new_model_manager/model_manager.h b/src/ge/graph/load/new_model_manager/model_manager.h
index 9a94e5c9..8e2424bf 100644
--- a/src/ge/graph/load/new_model_manager/model_manager.h
+++ b/src/ge/graph/load/new_model_manager/model_manager.h
@@ -178,7 +178,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
 
   ge::Status GetInputOutputDescInfo(const uint32_t model_id, std::vector<InputOutputDescInfo> &input_desc,
                                     std::vector<InputOutputDescInfo> &output_desc, std::vector<uint32_t> &inputFormats,
-                                    std::vector<uint32_t> &outputFormats);
+                                    std::vector<uint32_t> &outputFormats, bool new_model_desc = false);
   ///
   /// @ingroup ge
   /// @brief Get dynamic batch_info
diff --git a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
index a7b169bf..077ae827 100644
--- a/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/end_graph_task_info.cc
@@ -47,7 +47,8 @@ Status EndGraphTaskInfo::Distribute() {
   GE_CHECK_NOTNULL(davinci_model_);
   auto all_dump_model = PropertiesManager::Instance().GetAllDumpModel();
   if (all_dump_model.find(ge::DUMP_ALL_MODEL) != all_dump_model.end() ||
-      all_dump_model.find(davinci_model_->Name()) != all_dump_model.end()) {
+      all_dump_model.find(davinci_model_->Name()) != all_dump_model.end() ||
+      all_dump_model.find(davinci_model_->OmName()) != all_dump_model.end()) {
     GELOGI("Start to call rtEndGraphEx");
     rtError_t rt_ret = rtEndGraphEx(model_, stream_, kDumpFlag);
     if (rt_ret != RT_ERROR_NONE) {
diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
index 95580a15..79971529 100644
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_ex_task_info.cc
@@ -153,7 +153,8 @@ Status KernelExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davin
     GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(rt_ret, "rtMemcpy to input_output_addr_ error: 0x%X", rt_ret);
                     return FAILED;)
 
-    if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
+    if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
+                                                      op_desc->GetName())) {
       dump_flag_ = RT_KERNEL_DUMPFLAG;
       dump_args_ = input_output_addr_;
     }
diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
index 390e4e99..1f42b920 100644
--- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
+++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc
@@ -63,7 +63,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
     return ret;
   }
 
-  domi::KernelDef kernel_def = task_def.kernel();
+  const domi::KernelDef &kernel_def = task_def.kernel();
   block_dim_ = kernel_def.block_dim();
   args_size_ = kernel_def.args_size();
   // get opcontext stored in model
@@ -92,7 +92,7 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
   string session_graph_model_id;
   davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
   // get bin_file_key
-  const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
+  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
   // new aicpu kernel(rtCpuKernelLaunch) no need to check function
   if (kernel_type_ == cce::ccKernelType::CCE_AI_CORE) {
     rtError_t rt_ret;
@@ -494,7 +494,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
   // When inferencing, stub_func_ is different from dynamic-registration to runtime, and needs to be modified.
   string session_graph_model_id;
   davinci_model_->GetUniqueId(op_desc, session_graph_model_id);
-  const char *bin_file_key = DavinciModel::GetRegisterStub(op_desc->GetName(), session_graph_model_id);
+  const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc->GetName(), session_graph_model_id);
   rtError_t rt_ret = rtQueryFunctionRegistered(const_cast<char *>(bin_file_key));
   if (rt_ret != RT_ERROR_NONE) {
     stub_func_ = const_cast<char *>(bin_file_key);
@@ -549,7 +549,8 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
     return FAILED;
   }
 
-  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
+  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
+                                                    op_desc->GetName())) {
     dump_flag_ = RT_KERNEL_DUMPFLAG;
     dump_args_ = static_cast<char *>(args_) + offset;
   }
@@ -818,7 +819,8 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
     return RT_FAILED;
   }
 
-  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), op_desc->GetName())) {
+  if (PropertiesManager::Instance().IsLayerNeedDump(davinci_model_->Name(), davinci_model_->OmName(),
+                                                    op_desc->GetName())) {
     dump_flag_ = RT_KERNEL_DUMPFLAG;
     dump_args_ = static_cast<char *>(args_) + sizeof(aicpu::AicpuParamHead);
   }
diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc
index dd4855b6..a6cb2f8b 100644
--- a/src/ge/graph/manager/graph_manager.cc
+++ b/src/ge/graph/manager/graph_manager.cc
@@ -396,8 +396,6 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
   if (save_ret != SUCCESS) {
     GELOGW("Fail to save cache.");
   }
-  // release rts generate context
-  RtContextUtil::GetInstance().DestroyrtContexts();
   GEEVENT("[GEPERFTRACE] GE PreRun End");
   return SUCCESS;
 }
@@ -420,6 +418,8 @@ Status GraphManager::StartForRunGraph(const GraphNodePtr &graph_node, const std:
     ret = IncreBuild(graph_node, ge_model);
     if (ret != SUCCESS) {
       ret = PreRun(graph_node, inputs, ge_root_model, session_id);
+      // release rts generate context
+      RtContextUtil::GetInstance().DestroyrtContexts();
       if (ret != SUCCESS) {
         GELOGE(ret, "PreRun Failed.");
         return ret;
@@ -2165,6 +2165,8 @@ void GraphManager::PreRunThread(GraphManager *graph_manager) {
       GeModelPtr ge_model = nullptr;
       if (graph_manager->IncreBuild(graph_node, ge_model) != SUCCESS) {
         ret = graph_manager->PreRun(graph_node, ge_inputs, ge_root_model, args.session_id);
+        // release rts generate context
+        RtContextUtil::GetInstance().DestroyrtContexts();
         if (ret != SUCCESS) {
           graph_node->SetRunFlag(false);
           ReturnError(graph_manager, args.callback, ret, "PreRun Failed, thread exit..");
diff --git a/src/ge/graph/manager/graph_var_manager.cc b/src/ge/graph/manager/graph_var_manager.cc
index 2982eb89..9334a0af 100644
--- a/src/ge/graph/manager/graph_var_manager.cc
+++ b/src/ge/graph/manager/graph_var_manager.cc
@@ -91,7 +91,7 @@ ge::Status VarResource::SaveVarAddr(const std::string &var_name, const ge::GeTen
   std::string var_key = VarKey(var_name, tensor_desc);
   GELOGD("VarResource::SaveVarAddr, var_key = %s", var_key.c_str());
   if (var_addr_mgr_map_.count(var_key) == 0) {
-    uint64_t logic_address = VarManager::Instance(0)->GetVarMemLogicBase() +
+    uint64_t logic_address = VarManager::Instance(session_id_)->GetVarMemLogicBase() +
                              reinterpret_cast<uint64_t>(reinterpret_cast<std::uintptr_t>(address));
     GELOGI("SaveVarAddr node_name %s, tensor_desc format %s, type %s.", var_name.c_str(),
            TypeUtils::FormatToSerialString(tensor_desc.GetFormat()).c_str(),
diff --git a/src/ge/graph/partition/graph_partition.cc b/src/ge/graph/partition/graph_partition.cc
index 0dff2570..50cd7e81 100644
--- a/src/ge/graph/partition/graph_partition.cc
+++ b/src/ge/graph/partition/graph_partition.cc
@@ -105,9 +105,8 @@ void ge::GraphPartitioner::SetMergedGraphId(ge::ComputeGraphPtr &output_merged_c
 
 Status ge::GraphPartitioner::RemoveNodeAndEdgeBetweenEndPld(ge::ComputeGraphPtr &output_merged_compute_graph,
                                                             const std::vector<SubGraphInfoPtr> &sub_graph_list) {
-  ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>("mergedGraph");
-  output_merged_compute_graph = new_sub_graph;
-  if ((new_sub_graph == nullptr) || (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) {
+  if ((output_merged_compute_graph == nullptr) ||
+      (MergeAllSubGraph(output_merged_compute_graph, sub_graph_list) != SUCCESS)) {
     GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: MergeAllSubGraph failed.");
     return FAILED;
   }
@@ -229,6 +228,9 @@ Status ge::GraphPartitioner::MergeSubGraph(ge::ComputeGraphPtr &output_merged_co
       return FAILED;
     }
   }
+  ComputeGraphPtr new_sub_graph = MakeShared<ComputeGraph>(original_compute_graph->GetName());
+  GE_CHECK_NOTNULL(new_sub_graph);
+  output_merged_compute_graph = new_sub_graph;
   GE_TIMESTAMP_START(MergeGraphRemoveNode);
   if (RemoveNodeAndEdgeBetweenEndPld(output_merged_compute_graph, sub_graph_list) != ge::SUCCESS) {
     GELOGE(GE_GRAPH_PARAM_NULLPTR, "[GraphPartitioner]: merging sub-graphs failed");
diff --git a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc
index a1f8b14a..3b4e4c19 100644
--- a/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc
+++ b/src/ge/graph/passes/same_transdata_breadth_fusion_pass.cc
@@ -70,6 +70,7 @@ OpDescPtr SameTransdataBreadthFusionPass::GetCastOp(const GeTensorDesc &in_desc,
   cast_op_name << "fusion_cast_" << fusion_cast_op_count++;
   auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST);
   auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op);
+  node_op.BreakConnect();
   if (cast_op == nullptr) {
     GELOGE(INTERNAL_ERROR, "new fusion cast op failed!");
     return nullptr;
diff --git a/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc b/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc
index 92ae75e6..ba4cd031 100644
--- a/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc
+++ b/src/ge/graph/passes/transop_without_reshape_fusion_pass.cc
@@ -501,6 +501,7 @@ OpDescPtr TransOpWithoutReshapeFusionPass::GetCastOp(const GeTensorDesc &cast_in
   cast_op_name << "fusion_cast_op_" << fusion_cast_op_count++;
   auto node_op = ge::OperatorFactory::CreateOperator(cast_op_name.str(), CAST);
   auto cast_op = ge::OpDescUtils::GetOpDescFromOperator(node_op);
+  node_op.BreakConnect();
   if (cast_op == nullptr) {
     GELOGE(INTERNAL_ERROR, "new cast op failed!");
     return nullptr;
diff --git a/src/ge/graph/preprocess/graph_preprocess.cc b/src/ge/graph/preprocess/graph_preprocess.cc
index 68382f52..ac26e55e 100644
--- a/src/ge/graph/preprocess/graph_preprocess.cc
+++ b/src/ge/graph/preprocess/graph_preprocess.cc
@@ -19,8 +19,6 @@
 #include <set>
 #include <string>
 #include <utility>
-#include "common/formats/format_transfers/format_transfer_fractal_nz.h"
-#include "common/formats/format_transfers/format_transfer_fractal_z.h"
 #include "common/formats/format_transfers/format_transfer_nchw_nc1hwc0.h"
 #include "common/formats/format_transfers/format_transfer_nhwc_nc1hwc0.h"
 #include "common/formats/format_transfers/format_transfer_transpose.h"
@@ -34,6 +32,7 @@
 #include "graph/common/transop_util.h"
 #include "graph/debug/ge_attr_define.h"
 #include "graph/ge_context.h"
+#include "graph/shape_refiner.h"
 #include "graph/manager/graph_var_manager.h"
 #include "graph/manager/util/rt_context_util.h"
 #include "graph/optimize/graph_optimize.h"
@@ -123,9 +122,6 @@ static std::map<std::string, ge::DataType> output_type_str_to_datatype = {
   {"UINT32", ge::DT_UINT32}, {"UINT64", ge::DT_UINT64}, {"DOUBLE", ge::DT_DOUBLE}};
 
 const char *const kMbatchSwitchnName = "mbatch-switch-name";
-const int64_t kGemmNdShapeSize = 2;
-const int64_t kGemmAlignSize32 = 32;
-const int64_t kGemmAlignSize16 = 16;
 
 OpDescPtr CreateTensorShape(const GeTensorDesc &data_tensor) {
   GeTensorPtr tensor = MakeShared<GeTensor>();
@@ -1135,114 +1131,9 @@ Status ProcessInputNC1HWC0DynShape(NodePtr &node_ptr, bool &is_dynamic_batch, No
   return SUCCESS;
 }
 
-Status ProcessGemmFractalZ(GeShape &src_shape, std::vector<int64_t> &dst_shape_vec) {
-  dst_shape_vec.clear();
-  if (src_shape.GetDims().size() != kGemmNdShapeSize) {
-    GELOGE(INTERNAL_ERROR, "gemm shape size must be 2");
-    return FAILED;
-  }
-  dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(0), kGemmAlignSize32));
-  dst_shape_vec.push_back(formats::Ceil(src_shape.GetDim(1), kGemmAlignSize16));
-  dst_shape_vec.push_back(kGemmAlignSize16);
-  dst_shape_vec.push_back(kGemmAlignSize32);
-  return SUCCESS;
-}
-Status SetInOutForGemm(GeTensorDescPtr &input, GeTensorDescPtr &output, GeShape shape, Format format) {
-  input->SetShape(shape);
-  input->SetFormat(format);
-  output->SetShape(shape);
-  output->SetFormat(format);
-  int64_t input_shape_size = 0;
-  int64_t output_shape_size = 0;
-  ge::graphStatus input_graph_status = ge::TensorUtils::GetTensorSizeInBytes(*input, input_shape_size);
-  ge::graphStatus output_graph_status = ge::TensorUtils::GetTensorMemorySizeInBytes(*output, output_shape_size);
-  if ((input_graph_status != ge::GRAPH_SUCCESS) && (output_graph_status != ge::GRAPH_SUCCESS)) {
-    GELOGE(GRAPH_FAILED, "GetTensorSize failed!");
-    return FAILED;
-  }
-  ge::TensorUtils::SetSize(*input, input_shape_size);
-  ge::TensorUtils::SetSize(*output, output_shape_size);
-  return SUCCESS;
-}
-
-Status ProcessSingleOpInput(NodePtr &node_ptr, string &single_op_input_format) {
-  ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_input_format);
-  auto op_desc = node_ptr->GetOpDesc();
-  auto data_input = op_desc->MutableInputDesc(0);
-  auto data_output = op_desc->MutableOutputDesc(0);
-  ge::Format src_format = data_input->GetFormat();
-  ge::DataType src_dt = data_input->GetDataType();
-  ge::GeShape src_shape = data_input->GetShape();
-  std::vector<int64_t> dst_shape_vec;
-  if (input_format == FORMAT_FRACTAL_NZ) {
-    formats::FormatTransferFractalNz transfer;
-    if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str());
-      return FAILED;
-    }
-    ge::GeShape dst_shape(dst_shape_vec);
-    if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_NZ) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_NZ desc failed.", op_desc->GetName().c_str());
-      return FAILED;
-    }
-  } else if (input_format == FORMAT_FRACTAL_Z) {
-    if (ProcessGemmFractalZ(src_shape, dst_shape_vec) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Op [%s] trans FRACTAL_Z Shape failed.", op_desc->GetName().c_str());
-      return FAILED;
-    }
-    ge::GeShape dst_shape(dst_shape_vec);
-    if (SetInOutForGemm(data_input, data_output, dst_shape, FORMAT_FRACTAL_Z) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Op [%s] set FRACTAL_Z desc failed.", op_desc->GetName().c_str());
-      return FAILED;
-    }
-  }
-  // Gemm shape and format should be set at this stage, temporary solution.
-  auto out_anchor = node_ptr->GetOutDataAnchor(0);
-  for (auto &in_anchor : out_anchor->GetPeerInDataAnchors()) {
-    GE_CHECK_NOTNULL(in_anchor);
-    auto index = static_cast<uint32_t>(in_anchor->GetIdx());
-    ge::NodePtr next_node = in_anchor->GetOwnerNode();
-    GE_CHECK_NOTNULL(next_node);
-    auto next_op_desc = next_node->GetOpDesc();
-    GE_CHECK_NOTNULL(next_op_desc);
-    auto input_desc = next_op_desc->MutableInputDesc(index);
-    GE_CHECK_NOTNULL(input_desc);
-    input_desc->SetFormat(input_format);
-    input_desc->SetShape(data_output->GetShape());
-  }
-  return SUCCESS;
-}
-
-Status ProcessSingleOpOutput(OpDescPtr &op_desc, string &single_op_output_format) {
-  ge::Format input_format = TypeUtils::SerialStringToFormat(single_op_output_format);
-  auto data_input = op_desc->MutableInputDesc(0);
-  ge::Format src_format = data_input->GetFormat();
-  ge::DataType src_dt = data_input->GetDataType();
-  ge::GeShape src_shape = data_input->GetShape();
-  std::vector<int64_t> dst_shape_vec;
-  if (input_format == FORMAT_FRACTAL_NZ) {
-    formats::FormatTransferFractalNz transfer;
-    if (transfer.TransShape(src_format, src_shape.GetDims(), src_dt, FORMAT_FRACTAL_NZ, dst_shape_vec) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Op [%s] trans FZ Shape failed.", op_desc->GetName().c_str());
-      return FAILED;
-    }
-    ge::GeShape dst_shape(dst_shape_vec);
-    data_input->SetShape(dst_shape);
-    data_input->SetFormat(FORMAT_FRACTAL_NZ);
-  }
-  return SUCCESS;
-}
-
-Status ProcessDataNodeDynShape(NodePtr &node_ptr, bool &is_single_op) {
+Status ProcessDataNodeDynShape(NodePtr &node_ptr) {
   auto op_desc = node_ptr->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
-  std::string single_op_input_format;
-  if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_input_format", single_op_input_format))) {
-    if (ProcessSingleOpInput(node_ptr, single_op_input_format) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Process single op input [%s] failed.", node_ptr->GetName().c_str());
-      return FAILED;
-    }
-  }
   bool set_fp16 = false;
   if (!ge::AttrUtils::GetBool(node_ptr->GetOpDesc(), "input_fp16", set_fp16) || !set_fp16) {
     return SUCCESS;
@@ -1375,16 +1266,9 @@ bool NeedUpdateOutputByOutputTypeParm(std::string &output_type, NodePtr &src_nod
   return false;
 }
 
-Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type, bool &is_single_op) {
+Status ProcessNetoutputNodeDynShape(NodePtr &node, std::string &output_type) {
   auto op_desc = node->GetOpDesc();
   GE_CHECK_NOTNULL(op_desc);
-  std::string single_op_output_format;
-  if (is_single_op && (ge::AttrUtils::GetStr(op_desc, "_single_output_format", single_op_output_format))) {
-    if (ProcessSingleOpOutput(op_desc, single_op_output_format) != SUCCESS) {
-      GELOGE(INTERNAL_ERROR, "Process single op output [%s] failed.", node->GetName().c_str());
-      return FAILED;
-    }
-  }
   ge::DataType output_data_type = ge::DT_FLOAT;
 
   for (const auto &in_anchor : node->GetAllInDataAnchors()) {
@@ -1717,7 +1601,8 @@ Status GraphPrepare::UpdateInput(const std::vector<GeTensor> &user_input) {
       auto format = desc.GetFormat();
       auto origin_format = desc.GetOriginFormat();
       bool is_internal = TypeUtils::IsInternalFormat(format) || TypeUtils::IsInternalFormat(origin_format);
-      if (is_internal) {
+      bool need_check_internal_format = (!options_.is_single_op) && is_internal;
+      if (need_check_internal_format) {
         GELOGE(PARAM_INVALID, "Input format %s or origin_format %s is not support.",
                TypeUtils::FormatToSerialString(format).c_str(), TypeUtils::FormatToSerialString(origin_format).c_str());
         return FAILED;
@@ -2164,6 +2049,7 @@ Status GraphPrepare::GenerateInfershapeGraph(ConstGraphPtr graph) {
     GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret);
     return ret;
   }
+  ShapeRefiner::ClearContextMap();
   return SUCCESS;
 }
 
@@ -2389,6 +2275,7 @@ Status GraphPrepare::InferShapeForPreprocess() {
       }
     }
   }
+  ShapeRefiner::ClearContextMap();
   if (ret != SUCCESS) {
     GELOGE(ret, "Run ge_passes infershape for preprocess failed, ret:%u.", ret);
     return ret;
@@ -2821,14 +2708,14 @@ Status GraphPrepare::UpdateInputOutputByOptions() {
     }
 
     if (node_ptr->GetType() == DATA) {
-      if (ProcessDataNodeDynShape(node_ptr, options_.is_single_op) != SUCCESS) {
+      if (ProcessDataNodeDynShape(node_ptr) != SUCCESS) {
         GELOGE(INTERNAL_ERROR, "Process data node failed");
         return FAILED;
       }
     }
 
     if (node_ptr->GetType() == ge::NETOUTPUT) {
-      if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype, options_.is_single_op) != SUCCESS) {
+      if (ProcessNetoutputNodeDynShape(node_ptr, options_.output_datatype) != SUCCESS) {
         GELOGE(INTERNAL_ERROR, "Process netoutput node failed");
         return FAILED;
       }
diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
index 22128394..f35b6d3a 100644
--- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
+++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc
@@ -389,8 +389,8 @@ Status AippOp::SetDefaultParams() {
     GELOGI("parse aipp params:input_format:%s, csc_switch:%d.",
            domi::AippOpParams::InputFormat_Name(aipp_params_->input_format()).c_str(), aipp_params_->csc_switch());
 
-    GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d.", aipp_params_->mean_chn_0(),
-           aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2());
+    GELOGI("parse aipp params:mean_chn_0:%d, mean_chn_1:%d, mean_chn_2:%d, mean_chn_3:%d.", aipp_params_->mean_chn_0(),
+           aipp_params_->mean_chn_1(), aipp_params_->mean_chn_2(), aipp_params_->mean_chn_3());
 
     GELOGI("parse aipp params:min_chn_0:%f, min_chn_1:%f, min_chn_2:%f.", aipp_params_->min_chn_0(),
            aipp_params_->min_chn_1(), aipp_params_->min_chn_2());
diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
index 49f4d3dc..5fe19869 100644
--- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
+++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.cc
@@ -40,6 +40,23 @@ namespace ge {
 namespace {
 const char *const kMbatchSwitchnName = "mbatch-switch-name";
 }  // namespace
+static void ConvertShape2Nhwc(Format &format, vector<int64_t> &shape_vec) {
+  if ((format == FORMAT_NHWC) || (shape_vec.size() != static_cast<size_t>(NORMAL_TENSOR_SIZE))) {
+    return;
+  }
+  if (format != FORMAT_NCHW) {
+    GELOGW("The format is not NCHW, current format is %s", TypeUtils::FormatToSerialString(format).c_str());
+    return;
+  }
+  vector<int64_t> shape_vec_tmp;
+  shape_vec.swap(shape_vec_tmp);
+  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_N]);
+  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_H]);
+  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_W]);
+  shape_vec.push_back(shape_vec_tmp[NCHW_DIM_C]);
+  return;
+}
+
 Status InsertNewOpUtil::Init() {
   insert_op_conf_.reset((new (std::nothrow) domi::InsertNewOps()));
   GE_CHECK_NOTNULL(insert_op_conf_);
@@ -223,11 +240,13 @@ Status InsertNewOpUtil::UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &s
     GELOGE(FAILED, "UpdateOutputDesc fail, graph_ret:%d", graph_ret);
     return FAILED;
   }
-  GELOGI("Get size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str());
+  GELOGI("Get input size [%ld] from aipp [%s].", size, aipp_op_desc->GetName().c_str());
   if (size == 0) {
     GELOGE(FAILED, "Can not get size from aipp [%s]", aipp_op_desc->GetName().c_str());
     return FAILED;
   }
+  // Save the input size of aipp node, which will be used in dumping aipp node or fused aipp node
+  (void)AttrUtils::SetInt(aipp_input, ATTR_NAME_INPUT_ORIGIN_SIZE, size);
 
   auto in_data_anchor = node->GetInDataAnchor(0);
   GE_CHECK_NOTNULL(in_data_anchor);
@@ -305,6 +324,8 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt
 
   auto data_opdesc = data->GetOpDesc();
   GE_CHECK_NOTNULL(data_opdesc);
+  Format old_format = data_opdesc->MutableOutputDesc(0)->GetFormat();
+
   auto ret = data_opdesc->UpdateOutputDesc(0, *input_desc);
   if (ret != GRAPH_SUCCESS) {
     GELOGE(INTERNAL_ERROR, "Failed to update data %s output using switchn %s", data->GetName().c_str(),
@@ -317,9 +338,34 @@ Status InsertNewOpUtil::UpdateDataBySwitchN(const NodePtr &switchn, const NodePt
            switchn->GetName().c_str());
     return INTERNAL_ERROR;
   }
+  // Update attr _mbatch_origin_input_dims for data when it is linked to aipp
+  UpdateMultiBatchInputDims(data_opdesc, old_format);
   return SUCCESS;
 }
 
+void InsertNewOpUtil::UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format) {
+  if (!data_opdesc->HasAttr(ATTR_MBATCH_ORIGIN_INPUT_DIMS)) {
+    GELOGW("Failed to acquire _mbatch_origin_input_dims attr from node [%s]", data_opdesc->GetName().c_str());
+    return;
+  }
+  auto new_data_dims = data_opdesc->GetOutputDesc(0).GetShape().GetDims();
+  vector<int64_t> origin_input_dims;
+  (void)AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
+  // Convert origin_input_dims to NHWC because data format is set to NHWC when it is linked to aipp.
+  ConvertShape2Nhwc(old_format, origin_input_dims);
+  if (new_data_dims.size() != origin_input_dims.size()) {
+    return;
+  }
+  for (size_t i = 0; i < origin_input_dims.size(); ++i) {
+    // Need to update shape when aipp has crop function because H,W is different, ignore -1.
+    if (origin_input_dims[i] > 0) {
+      origin_input_dims[i] = new_data_dims[i];
+    }
+  }
+  (void)AttrUtils::SetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims);
+  return;
+}
+
 Status InsertNewOpUtil::GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map) {
   GELOGI("Start to get data and next node %s.", node->GetName().c_str());
   OpDescPtr data_op = node->GetOpDesc();
@@ -420,15 +466,18 @@ Status InsertNewOpUtil::RecordAIPPInfoToData(const ComputeGraphPtr &graph) {
       GetInputOutputInfo(data_node, aipp_it, input, output);
       input_dims.emplace_back(input);
       output_dims.emplace_back(output);
+
+      // When static aipp is set, need to get the model input dims which processed by aipp
+      GE_RETURN_IF_ERROR(SetModelInputDims(data_node, aipp_it));
     }
 
     if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_INPUTS, input_dims)) {
-      GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str());
+      GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_INPUTS.c_str());
       return FAILED;
     }
 
     if (!AttrUtils::SetListStr(data_node->GetOpDesc(), ATTR_NAME_AIPP_OUTPUTS, output_dims)) {
-      GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str());
+      GELOGE(FAILED, "SetListStr of %s failed.", ATTR_NAME_AIPP_OUTPUTS.c_str());
       return FAILED;
     }
   }
@@ -473,4 +522,41 @@ Status InsertNewOpUtil::GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_nod
          data_node->GetName().c_str(), aipp_node->GetName().c_str(), input.c_str(), output.c_str());
   return SUCCESS;
 }
+
+Status InsertNewOpUtil::SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node) {
+  GE_CHECK_NOTNULL(data_node);
+  GE_CHECK_NOTNULL(aipp_node);
+  OpDescPtr data_opdesc = data_node->GetOpDesc();
+  GE_CHECK_NOTNULL(data_opdesc);
+  OpDescPtr aipp_opdesc = aipp_node->GetOpDesc();
+  GE_CHECK_NOTNULL(aipp_opdesc);
+
+  // In dynamic bacth/hw scenario, the new model input dims only need be set once
+  if (data_node->GetOpDesc()->HasAttr(ATTR_NAME_INPUT_DIMS)) {
+    GELOGD("Data %s already has attribute %s", data_node->GetOpDesc()->GetName().c_str(), ATTR_NAME_INPUT_DIMS.c_str());
+    return SUCCESS;
+  }
+  vector<int64_t> model_input_dims;
+  vector<int64_t> origin_input_dims;
+  if (AttrUtils::GetListInt(aipp_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims) && !model_input_dims.empty()) {
+    // When dynamic bacth/hw is set, N or HW need to be set to -1
+    if (AttrUtils::GetListInt(data_opdesc, ATTR_MBATCH_ORIGIN_INPUT_DIMS, origin_input_dims) &&
+        !origin_input_dims.empty()) {
+      GELOGI("In dynamic bacth/hw scenario, N or HW need to be set to -1. model_input_dims: %s, origin_input_dims: %s",
+             formats::JoinToString(model_input_dims).c_str(), formats::JoinToString(origin_input_dims).c_str());
+      for (size_t i = 0; i < origin_input_dims.size(); ++i) {
+        // N or HW need to be set to -1
+        if (origin_input_dims[i] < 0) {
+          model_input_dims[i] = origin_input_dims[i];
+        }
+      }
+    }
+    GELOGD("After set H/W to -1, the model input dims: %s.", formats::JoinToString(model_input_dims).c_str());
+    if (!AttrUtils::SetListInt(data_opdesc, ATTR_NAME_INPUT_DIMS, model_input_dims)) {
+      GELOGE(FAILED, "SetListInt of %s failed.", ATTR_NAME_INPUT_DIMS.c_str());
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
 }  // namespace ge
diff --git a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h
index 8dad2012..93a96ca2 100644
--- a/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h
+++ b/src/ge/graph/preprocess/insert_op/util_insert_aipp_op.h
@@ -61,11 +61,13 @@ class InsertNewOpUtil {
 
   std::unique_ptr<domi::InsertNewOps> insert_op_conf_;
 
+  void UpdateMultiBatchInputDims(const OpDescPtr &data_opdesc, Format &old_format);
   Status UpdatePrevNodeByAipp(NodePtr &node, std::set<NodePtr> &switchns);
   Status UpdateDataBySwitchN(const NodePtr &switchn, const NodePtr &data);
   Status GetDataRelatedNode(NodePtr &node, std::map<NodePtr, std::set<NodePtr>> &data_next_node_map);
   Status GetAllAipps(const NodePtr &node, std::vector<NodePtr> &aipps);
   Status GetInputOutputInfo(NodePtr &data_node, NodePtr &aipp_node, std::string &input, std::string &output);
+  Status SetModelInputDims(NodePtr &data_node, NodePtr &aipp_node);
 };
 }  // namespace ge
 
diff --git a/src/ge/graph/preprocess/multi_batch_copy_graph.cc b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
index e063398f..fbe935ec 100644
--- a/src/ge/graph/preprocess/multi_batch_copy_graph.cc
+++ b/src/ge/graph/preprocess/multi_batch_copy_graph.cc
@@ -44,6 +44,7 @@ const int kSwitchNPredIndex = 1;
 const int kDataOutIndex = 0;
 const int kDataInIndex = 0;
 const int kMergeDataOutIndex = 0;
+const int kStaticOutput = -1;
 const size_t kMaxShapesCount = 100;
 const size_t kMinShapesCount = 2;
 
@@ -947,15 +948,18 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) {
     GELOGE(PARAM_INVALID, "Graph is null ,para is invalid");
     return PARAM_INVALID;
   }
-  for (auto &node : graph->GetAllNodes()) {
+  for (auto &node : graph->GetDirectNode()) {
     if (node->GetType() == NETOUTPUT) {
       auto netoutput_desc = node->GetOpDesc();
       auto inputnode_to_netoutput = node->GetInAllNodes();
+      std::vector<size_t> dynamic_output_index;
       for (size_t j = 0; j < inputnode_to_netoutput.size(); j++) {
         bool ret = false;
         (void)AttrUtils::GetBool(inputnode_to_netoutput.at(j)->GetOpDesc(), ATTR_INSERT_BY_MBATCH, ret);
         if (inputnode_to_netoutput.at(j)->GetType() == MERGE && ret) {
-          GELOGI("Find the merge node %s with mbatch attr", inputnode_to_netoutput.at(j)->GetName().c_str());
+          GELOGI("Find the merge node %s with mbatch attr and the index is %zu",
+                 inputnode_to_netoutput.at(j)->GetName().c_str(), j);
+          dynamic_output_index.emplace_back(j);
           for (size_t i = 0; i < inputnode_to_netoutput.at(j)->GetInNodes().size(); i++) {
             auto input_desc = inputnode_to_netoutput.at(j)->GetOpDesc();
             auto input_tensor_desc = input_desc->GetInputDesc(i);
@@ -967,6 +971,17 @@ Status GetDynamicOutputShape(ComputeGraphPtr &graph) {
         }
       }
       if (dynamic_output_dims.size() > 0) {
+        for (size_t k = 0; k < inputnode_to_netoutput.size(); k++) {
+          auto it = std::find(dynamic_output_index.begin(), dynamic_output_index.end(), k);
+          if (it != dynamic_output_index.end()) {
+            continue;
+          }
+          auto tensor_desc = netoutput_desc->GetInputDesc(k);
+          auto shape = tensor_desc.GetShape().ToString();
+          std::string static_output_shape = std::to_string(kStaticOutput) + "," + std::to_string(k) + "," + shape;
+          GELOGI("The static output shape msg is %s", static_output_shape.c_str());
+          dynamic_output_dims.emplace_back(static_output_shape);
+        }
         if (!AttrUtils::SetListStr(netoutput_desc, ATTR_NAME_DYNAMIC_OUTPUT_DIMS, dynamic_output_dims)) {
           GELOGE(FAILED, "Set dynamic output dims attr failed");
           return FAILED;
diff --git a/src/ge/host_kernels/concat_v2_kernel.cc b/src/ge/host_kernels/concat_v2_kernel.cc
index 81127302..c46b4277 100644
--- a/src/ge/host_kernels/concat_v2_kernel.cc
+++ b/src/ge/host_kernels/concat_v2_kernel.cc
@@ -31,6 +31,7 @@
 namespace ge {
 namespace {
 const size_t kConcatV2InputNum = 3;
+const int kSupportEmptyTensorRank = 1;
 const std::set<DataType> concatv2_supported_type = {DT_INT32, DT_FLOAT};
 
 template <typename T>
@@ -39,7 +40,12 @@ void GetOutputData(std::vector<T> &y_data, int64_t loop, size_t &input_size,
   for (int64_t i = 0; i < loop; i++) {
     for (size_t k = 0; k < input_size; k++) {
       GeShape datak_shape = input.at(k)->GetTensorDesc().GetShape();
-      const T *datak = reinterpret_cast<const T *>(input.at(k)->GetData().data());
+      auto buffer = input.at(k)->GetData();
+      const T *datak = reinterpret_cast<const T *>(buffer.data());
+      if (datak == nullptr || buffer.size() == 0) {
+        GELOGW("input[%zu] is with no data", k);
+        continue;
+      }
       int64_t gapk = datak_shape.GetShapeSize() / loop;  // [2,3] is 6/loop
       for (int64_t j = 0; j < gapk; j++) {
         y_data.push_back(datak[j + gapk * i]);
@@ -63,7 +69,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
     return PARAM_INVALID;
   }
   int tidx = -1;
-  Status ret = ConcatV2PreCompute(input, tidx);
+  ConstGeTensorPtr tensor = nullptr;
+  Status ret = ConcatV2PreCompute(input, tidx, tensor);
   if (ret != SUCCESS) {
     return ret;
   }
@@ -71,9 +78,8 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
   size_t input_size = input.size();  // N + 1
   input_size--;                      // N
 
-  ConstGeTensorPtr tensor0 = input.at(0);
-  GE_CHECK_NOTNULL(tensor0);
-  DataType data_type = tensor0->GetTensorDesc().GetDataType();
+  GE_CHECK_NOTNULL(tensor);
+  DataType data_type = tensor->GetTensorDesc().GetDataType();
   uint32_t length = 0;
   if (!TypeUtils::GetDataTypeLength(data_type, length)) {
     GELOGW("Can't GetDataTypeLength of data_type: %s", TypeUtils::DataTypeToSerialString(data_type).c_str());
@@ -91,7 +97,7 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
     return MEMALLOC_FAILED;
   }
 
-  GeShape data0_shape = tensor0->GetTensorDesc().GetShape();
+  GeShape data0_shape = tensor->GetTensorDesc().GetShape();
   int64_t loop = 1;
   for (int i = 0; i < tidx; i++) {
     loop *= data0_shape.GetDim(i);
@@ -110,29 +116,33 @@ Status ConcatV2Kernel::Compute(const ge::OpDescPtr op_desc_ptr, const vector<ge:
   return SUCCESS;
 }
 
-Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx) {
+Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx,
+                                          ConstGeTensorPtr &tensor) {
   size_t input_size = input.size();
   // N >= 2 and N + 1 >= 3
   if (input_size < kConcatV2InputNum) {
     GELOGI("The number of input for ConcatV2 must not be less than %zu.", kConcatV2InputNum);
     return NOT_CHANGED;
   }
-
+  bool has_empty_tensor = false;
+  input_size--;
   for (size_t i = 0; i < input_size; i++) {
     if (input[i] == nullptr) {
       GELOGI("Input%zu must not be null.", i);
       return NOT_CHANGED;
     }
     if (input.at(i)->GetData().size() == 0) {
-      GELOGI("Check data size fail. input%zu size is 0.", i);
-      return NOT_CHANGED;
+      GELOGW("input[%zu] is with no data.", i);
+      has_empty_tensor = true;
+      continue;
+    }
+    if (tensor == nullptr) {
+      tensor = input.at(i);  // get first valid tensor with data
     }
   }
 
-  input_size--;
-  ConstGeTensorPtr tensor0 = input.at(0);
-  GE_CHECK_NOTNULL(tensor0);
-  DataType data_type = tensor0->GetTensorDesc().GetDataType();
+  GE_CHECK_NOTNULL(tensor);
+  DataType data_type = tensor->GetTensorDesc().GetDataType();
   for (size_t i = 1; i < input_size; i++) {
     if (data_type != input.at(i)->GetTensorDesc().GetDataType()) {
       GELOGI("Data type of N inputs for ConcatV2 not the same, check input %zu failed.", i);
@@ -149,13 +159,18 @@ Status ConcatV2Kernel::ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &i
   ConstGeTensorPtr tensor_axis = input.at(input_size);
   GE_CHECK_NOTNULL(tensor_axis);
   const int *axis = reinterpret_cast<const int *>(tensor_axis->GetData().data());
-  tidx = axis[0];                                                                // [-rank(values), rank(values))
-  int dims = static_cast<int>(tensor0->GetTensorDesc().GetShape().GetDimNum());  // rank
+  GE_CHECK_NOTNULL(axis);
+  tidx = axis[0];                                                               // [-rank(values), rank(values))
+  int rank = static_cast<int>(tensor->GetTensorDesc().GetShape().GetDimNum());  // rank
   if (tidx < 0) {
-    tidx += dims;
+    tidx += rank;
   }
-  if (tidx < 0 || tidx > dims) {
-    GELOGI("ConcatV2 tidx not legal.");
+  // 1. tidx should in range [0,rank)
+  // 2. empty tensor only support case: [n],[m],[]
+  // case: [[],[]] ,[[],[]] ,[] or other case when rank >=2 is not supported
+  if (tidx < 0 || tidx >= rank || (has_empty_tensor && rank > kSupportEmptyTensorRank)) {
+    GELOGW("ConcatV2 info: tidx[%d]_rank[%d]_has_empty_tensor[bool:%d] cannot be supported, skip fold.", tidx, rank,
+           has_empty_tensor);
     return NOT_CHANGED;
   }
 
diff --git a/src/ge/host_kernels/concat_v2_kernel.h b/src/ge/host_kernels/concat_v2_kernel.h
index c1514c80..353b7ed5 100644
--- a/src/ge/host_kernels/concat_v2_kernel.h
+++ b/src/ge/host_kernels/concat_v2_kernel.h
@@ -28,7 +28,7 @@ class ConcatV2Kernel : public Kernel {
                  std::vector<GeTensorPtr> &v_output) override;
 
  private:
-  Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx);
+  Status ConcatV2PreCompute(const std::vector<ConstGeTensorPtr> &input, int &tidx, ConstGeTensorPtr &tensor);
 };
 }  // namespace ge
 
diff --git a/src/ge/init/gelib.cc b/src/ge/init/gelib.cc
index 5fcb0cd7..fd54c8c9 100644
--- a/src/ge/init/gelib.cc
+++ b/src/ge/init/gelib.cc
@@ -46,6 +46,8 @@ namespace ge {
 namespace {
 const int kDecimal = 10;
 const int kSocVersionLen = 50;
+const int kDefaultDeviceIdForTrain = 0;
+const int kDefaultDeviceIdForInfer = -1;
 }  // namespace
 static std::shared_ptr<GELib> instancePtr_ = nullptr;
 
@@ -194,8 +196,12 @@ Status GELib::SystemInitialize(const map<string, string> &options) {
   // In train and infer, profiling is always needed.
   InitOptions(options);
   InitProfiling(this->options_);
-
-  if (is_train_mode_) {
+  // 1.`is_train_mode_` means case: train
+  // 2.`(!is_train_mode_) && (options_.device_id != kDefaultDeviceIdForInfer)` means case: online infer
+  // these two case need call `InitSystemWithOptions->rtGetDeviceIndexByPhyId`
+  // to convert phy device id to logical device id
+  // note:rtGetDeviceIndexByPhyId return `0` logical id when input phy device id is `0`
+  if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) {
     status = InitSystemWithOptions(this->options_);
   } else {
     status = InitSystemWithoutOptions();
@@ -237,7 +243,7 @@ void GELib::InitOptions(const map<string, string> &options) {
   if (iter != options.end()) {
     this->options_.session_id = std::strtoll(iter->second.c_str(), nullptr, kDecimal);
   }
-  this->options_.device_id = 0;
+  this->options_.device_id = is_train_mode_ ? kDefaultDeviceIdForTrain : kDefaultDeviceIdForInfer;
   iter = options.find(OPTION_EXEC_DEVICE_ID);
   if (iter != options.end()) {
     this->options_.device_id = static_cast<int32_t>(std::strtol(iter->second.c_str(), nullptr, kDecimal));
@@ -289,7 +295,8 @@ void GELib::InitOptions(const map<string, string> &options) {
 }
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOptions(Options &options) {
-  GELOGI("Training init GELib. session Id:%ld, device id :%d ", options.session_id, options.device_id);
+  std::string mode = is_train_mode_ ? "Training" : "Online infer";
+  GELOGI("%s init GELib. session Id:%ld, device id :%d ", mode.c_str(), options.session_id, options.device_id);
   GEEVENT("System init with options begin, job id %s", options.job_id.c_str());
   std::lock_guard<std::mutex> lock(status_mutex_);
   GE_IF_BOOL_EXEC(is_system_inited && !is_shutdown,
@@ -329,13 +336,14 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status GELib::InitSystemWithOpt
   is_system_inited = true;
   is_shutdown = false;
 
-  GELOGI("Training init GELib success.");
+  GELOGI("%s init GELib success.", mode.c_str());
 
   return SUCCESS;
 }
 
 Status GELib::SystemShutdownWithOptions(const Options &options) {
-  GELOGI("Training finalize GELib begin.");
+  std::string mode = is_train_mode_ ? "Training" : "Online infer";
+  GELOGI("%s finalize GELib begin.", mode.c_str());
 
   std::lock_guard<std::mutex> lock(status_mutex_);
   GE_IF_BOOL_EXEC(is_shutdown || !is_system_inited,
@@ -353,8 +361,7 @@ Status GELib::SystemShutdownWithOptions(const Options &options) {
   is_system_inited = false;
   is_shutdown = true;
 
-  GELOGI("Training finalize GELib success.");
-
+  GELOGI("%s finalize GELib success.", mode.c_str());
   return SUCCESS;
 }
 
@@ -424,7 +431,7 @@ Status GELib::Finalize() {
   // Shut down profiling
   ShutDownProfiling();
 
-  if (is_train_mode_) {
+  if (is_train_mode_ || (options_.device_id != kDefaultDeviceIdForInfer)) {
     GELOGI("System ShutDown.");
     mid_state = SystemShutdownWithOptions(this->options_);
     if (mid_state != SUCCESS) {
diff --git a/src/ge/offline/main.cc b/src/ge/offline/main.cc
index 27309c1a..e1e55dfe 100644
--- a/src/ge/offline/main.cc
+++ b/src/ge/offline/main.cc
@@ -39,6 +39,7 @@
 #include "ir_build/atc_ir_common.h"
 #include "omg/omg.h"
 #include "omg/parser/parser_factory.h"
+#include "omg/parser/parser_inner_ctx.h"
 #include "parser/common/register_tbe.h"
 #include "register/op_registry.h"
 #include "single_op_parser.h"
@@ -178,8 +179,6 @@ DEFINE_string(compress_weight_conf, "", "Optional; the config file to compress w
 
 DEFINE_string(enable_single_stream, "", "Optional; enable single stream. true: enable; false(default): disable");
 
-DEFINE_string(quant_optimize, "true", "Optional; enable quant optimize. true: enable; false(default): disable");
-
 DEFINE_string(log, "default", "Optional; generate atc log. Support debug, info, warning, error, null");
 
 DEFINE_string(dump_mode, "0", "Optional; generate infershape json,only support 1 , 0.");
@@ -203,10 +202,7 @@ class GFlagUtils {
       "arguments explain:\n"
       "  --model             Model file\n"
       "  --singleop          Single op definition file. atc will generate offline "
-      "model(s) for single op if --singleop is set. \n"
-      "                      Note: Only output, soc_verion, core_type, aicore_num, auto_tune_mode, precision_mode, "
-      "op_select_implmode, enable_small_channel, enable_compress_weight, compress_weight_conf "
-      "enable_single_stream and log are valid in this mode \n"
+      "model(s) for single op if --singleop is set.\n"
       "  --weight            Weight file. Required when framework is Caffe\n"
       "  --framework         Framework type(0:Caffe; 1:MindSpore; 3:Tensorflow)\n"
       "  --output            Output file path&name(needn't suffix, will add "
@@ -253,6 +249,9 @@ class GFlagUtils {
       "  --op_select_implmode    Set op select implmode. Support high_precision, high_performance."
       "default: high_performance\n"
       "disable\n"
+      "  --optypelist_for_implmode    Appoint which op to use op_select_implmode, used with op_select_implmode ."
+      "Separate multiple nodes with commas (,). Use double quotation marks (\") to enclose each argument."
+      "E.g.: \"node_name1,node_name2\"\n"
       "  --head_stream       Add head stream. 0(default): disable; 1: enable\n"
       "  --soc_version       The soc version. E.g.: \"Ascend310\"\n"
       "  --core_type         Set core type AiCore or VectorCore. VectorCore: use vector core. "
@@ -270,8 +269,7 @@ class GFlagUtils {
       "Use double quotation marks (\") to enclose each argument."
       "E.g: \"imagesize1_height,imagesize1_width;imagesize2_height,imagesize2_width\"\n"
       "  --auto_tune_mode    Set tune mode. E.g.: \"GA,RL\", support configure multiple, spit by ,\n"
-      "  --enable_single_stream    Enable single stream. true: enable; false(default): disable\n"
-      "  --quant_optimize Enable quant optimize. true(default): enable; false: disable\n");
+      "  --enable_single_stream    Enable single stream. true: enable; false(default): disable\n");
 
     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
     // Using gflags to analyze input parameters
@@ -656,13 +654,36 @@ void LoadCustomOpLib() {
 
   std::vector<OpRegistrationData> registrationDatas = OpRegistry::Instance()->registrationDatas;
   for (OpRegistrationData reg_data : registrationDatas) {
-    bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data);
-    if (ret) {
-      OpRegistry::Instance()->Register(reg_data);
+    if (reg_data.GetFrameworkType() == static_cast<domi::FrameworkType>(FLAGS_framework)) {
+      bool ret = ge::OpRegistrationTbe::Instance()->Finalize(reg_data);
+      if (ret) {
+        (void)OpRegistry::Instance()->Register(reg_data);
+      }
     }
   }
 }
 
+void SaveCustomCaffeProtoPath() {
+  GELOGI("Enter save custom caffe proto path.");
+  string customop_path;
+
+  const char *path_env = std::getenv("ASCEND_OPP_PATH");
+  if (path_env != nullptr) {
+    std::string path = path_env;
+    customop_path = path + "/framework/custom/caffe/";
+    GELOGI("Get custom proto path from env : %s", path_env);
+    ge::GetParserContext().custom_proto_path = customop_path;
+    return;
+  }
+  std::string path_base = ge::GELib::GetPath();
+  GELOGI("path_base is %s", path_base.c_str());
+  path_base = path_base.substr(0, path_base.rfind('/'));
+  path_base = path_base.substr(0, path_base.rfind('/') + 1);
+  customop_path = path_base + "ops/framework/custom/caffe/";
+  ge::GetParserContext().custom_proto_path = customop_path;
+  return;
+}
+
 #endif
 
 Status CreateInputsForInference(const ge::Graph &graph, vector<ge::GeTensor> &inputs) {
@@ -850,6 +871,7 @@ domi::Status GenerateModel(std::map<string, string> &options, std::string output
     atc_params.insert(std::pair<string, string>("is_output_adjust_hw_layout", FLAGS_is_output_adjust_hw_layout));
     atc_params.insert(std::pair<string, string>("compress_weight_conf", FLAGS_compress_weight_conf));
     atc_params.insert(std::pair<string, string>(string(ge::OUTPUT_DATATYPE), FLAGS_output_type));
+    atc_params.insert(std::pair<string, string>("output", output));
 
     Status ret =
       ParseGraph(graph, atc_params, FLAGS_model.c_str(), FLAGS_weight.c_str(), (domi::FrameworkType)FLAGS_framework,
@@ -982,6 +1004,8 @@ domi::Status GenerateOmModel() {
   // Load custom operator Library
   LoadCustomOpLib();
 
+  SaveCustomCaffeProtoPath();
+
   ret = ge::CheckCustomAiCpuOpLib();
 
   GE_CHK_BOOL_EXEC(ret == domi::SUCCESS, return domi::FAILED, "check custom aicpu run so failed!");
@@ -1043,8 +1067,6 @@ domi::Status GenerateOmModel() {
 
   options.insert(std::pair<string, string>(string(ge::ENABLE_SINGLE_STREAM), FLAGS_enable_single_stream));
 
-  options.insert(std::pair<string, string>(string(ge::QUANT_OPTIMIZE), FLAGS_quant_optimize));
-
   SetDynamicBatchSizeOrImagesizeOptions();
 
   if (!FLAGS_save_original_model.empty()) {
diff --git a/src/ge/offline/single_op_parser.cc b/src/ge/offline/single_op_parser.cc
index 067d39e2..4d589565 100644
--- a/src/ge/offline/single_op_parser.cc
+++ b/src/ge/offline/single_op_parser.cc
@@ -273,10 +273,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single
     } else {
       op_desc->AddInputDesc(desc.name, ge_tensor_desc);
     }
-    if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) {
-      ge_tensor_desc.SetFormat(FORMAT_ND);
-      ge_tensor_desc.SetOriginFormat(FORMAT_ND);
-    }
     build_param.inputs.emplace_back(ge_tensor_desc);
   }
 
@@ -292,10 +288,6 @@ Status SingleOpParser::ConvertToBuildParam(int index, const SingleOpDesc &single
     TensorUtils::SetInputTensor(ge_tensor_desc, false);
     TensorUtils::SetOutputTensor(ge_tensor_desc, true);
     op_desc->AddOutputDesc(ge_tensor_desc);
-    if (desc.format == FORMAT_FRACTAL_NZ || desc.format == FORMAT_FRACTAL_Z) {
-      ge_tensor_desc.SetFormat(FORMAT_ND);
-      ge_tensor_desc.SetOriginFormat(FORMAT_ND);
-    }
     build_param.outputs.emplace_back(ge_tensor_desc);
   }
 
diff --git a/src/ge/session/omg.cc b/src/ge/session/omg.cc
index 8fe31624..71dd631e 100644
--- a/src/ge/session/omg.cc
+++ b/src/ge/session/omg.cc
@@ -29,6 +29,8 @@
 #include "common/types.h"
 #include "common/util.h"
 #include "common/util/error_manager/error_manager.h"
+#include "common/helper/model_helper.h"
+#include "common/ge/ge_util.h"
 #include "framework/common/debug/ge_log.h"
 #include "framework/omg/parser/parser_inner_ctx.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
@@ -419,10 +421,6 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const
       GELOGE(domi::FAILED, "Can not find src node (%s) in graph.", user_out_nodes[i].first.c_str());
       return domi::FAILED;
     }
-    if (out_node->GetType() == DATA) {
-      GELOGE(domi::FAILED, "out_nodes [%s] can not be set input data, please check", user_out_nodes[i].first.c_str());
-      return domi::FAILED;
-    }
     auto op_desc = out_node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
     if (i < output_formats.size()) {
@@ -441,24 +439,49 @@ Status SetOutputNodeInfo(ge::Graph &graph, const std::string &output_type, const
       (void)ge::AttrUtils::SetListInt(op_desc, "_output_dt_index", it_index->second);
     }
     output_nodes_info.push_back(std::make_pair(out_node, user_out_nodes[i].second));
-    output_nodes_name.push_back(out_node->GetName());
+    output_nodes_name.push_back(out_node->GetName() + ":" + std::to_string(user_out_nodes[i].second));
   }
   // default output node (leaf)
   if (user_out_nodes.empty()) {
     for (ge::NodePtr node : compute_graph->GetDirectNode()) {
       if (!node->GetInDataNodes().empty() && node->GetOutDataNodes().empty()) {
-        Status ret = GetOutputLeaf(node, output_nodes_info, output_nodes_name);
+        Status ret = GetOutputLeaf(node, output_nodes_info);
         GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "find leaf fail.");
       }
     }
   }
+  GetOutputNodesNameAndIndex(output_nodes_info, output_nodes_name);
   compute_graph->SetGraphOutNodesInfo(output_nodes_info);
   domi::GetContext().net_out_nodes = output_nodes_name;
   return domi::SUCCESS;
 }
 
-Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
-                     std::vector<std::string> &output_nodes_name) {
+void GetOutputNodesNameAndIndex(std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info,
+                                std::vector<std::string> &output_nodes_name) {
+  output_nodes_name.clear();
+  if (domi::GetContext().out_top_names.empty()) {
+    // tf process, no top name.
+    for (const auto output_node_info : output_nodes_info) {
+      std::string node_name = output_node_info.first->GetName();
+      int32_t index = output_node_info.second;
+      output_nodes_name.push_back(node_name + ":" + std::to_string(index));
+    }
+    return;
+  }
+  // caffe process, need add top name after node_name:index
+  for (size_t i = 0; i < output_nodes_info.size(); ++i) {
+    std::string node_name = output_nodes_info[i].first->GetName();
+    int32_t index = output_nodes_info[i].second;
+    if (i < domi::GetContext().out_top_names.size()) {
+      output_nodes_name.push_back(node_name + ":" + std::to_string(index) + ":" + domi::GetContext().out_top_names[i]);
+    } else {
+      GELOGW("Get top name of node [%s] fail.", node_name.c_str());
+      output_nodes_name.push_back(node_name + ":" + std::to_string(index));
+    }
+  }
+}
+
+Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>> &output_nodes_info) {
   ge::OpDescPtr tmpDescPtr = node->GetOpDesc();
   if (tmpDescPtr == nullptr) {
     GELOGE(domi::FAILED, "Get outnode op desc fail.");
@@ -468,7 +491,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>>
   if (node->GetType() != NETOUTPUT) {
     for (size_t index = 0; index < size; ++index) {
       output_nodes_info.push_back(std::make_pair(node, index));
-      output_nodes_name.push_back(node->GetName());
     }
   } else {
     const auto in_anchors = node->GetAllInDataAnchors();
@@ -480,7 +502,6 @@ Status GetOutputLeaf(NodePtr node, std::vector<std::pair<ge::NodePtr, int32_t>>
       }
       auto out_node = out_anchor->GetOwnerNode();
       output_nodes_info.push_back(std::make_pair(out_node, out_anchor->GetIdx()));
-      output_nodes_name.push_back(out_node->GetName());
     }
   }
   return SUCCESS;
@@ -612,9 +633,16 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
   Params::Instance()->SetTarget(target);
 
   // Create an empty computegraph
-  ComputeGraphPtr compute_graph = nullptr;
-  GE_MAKE_SHARED(compute_graph = std::make_shared<ComputeGraph>(kGraphDefaultName + "_" + CurrentTimeInStr()),
-                 return FAILED);
+  std::string om_name;
+  ParseAtcParms(atc_params, "output", om_name);
+  ModelHelper model_helper;
+  string graph_name = "";
+  Status name_ret = model_helper.GetBaseNameFromFileName(om_name, graph_name);
+  if (name_ret != SUCCESS) {
+    graph_name = kGraphDefaultName + "_" + CurrentTimeInStr();
+  }
+  ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name);
+  GE_CHECK_NOTNULL(compute_graph);
   graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph);
 
   // initialize omgContext
@@ -664,8 +692,6 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
     GELOGI("The pre-checking report has been saved to %s.", check_report.c_str());
   }
 
-  // Prevent data residue in multiple calls
-  PreChecker::Instance().Clear();
   GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC model parse ret fail.");
 
   std::string input_fp16_nodes;
@@ -693,12 +719,19 @@ FMK_FUNC_HOST_VISIBILITY Status ParseGraph(ge::Graph &graph, const std::map<stri
   graph = GraphUtils::CreateGraphFromComputeGraph(compute_graph);
   auto weights_parser = WeightsParserFactory::Instance()->CreateWeightsParser(type);
   ret = weights_parser->Parse(weights_file, graph);
-  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail.");
 
   // IN ONLY_PRE_CHECK mode, generate pre inspection report only.
-  if (run_mode == ONLY_PRE_CHECK) {
+  if (PreChecker::Instance().HasError() || run_mode == ONLY_PRE_CHECK) {
+    std::string check_report;
+    ParseAtcParms(atc_params, "check_report", check_report);
+    GE_RETURN_WITH_LOG_IF_ERROR(PreChecker::Instance().Save(check_report), "Generate pre-checking report failed.");
+    GEEVENT("The pre-checking report has been saved to %s.", check_report.c_str());
     return SUCCESS;
   }
+  // Prevent data residue in multiple calls
+  PreChecker::Instance().Clear();
+
+  GE_CHK_BOOL_RET_STATUS(ret == SUCCESS, ret, "ATC weights parse ret fail.");
 
   GELOGI("ATC parser success.");
 
diff --git a/src/ge/single_op/single_op_manager.cc b/src/ge/single_op/single_op_manager.cc
index 79f3f044..990ca9cc 100644
--- a/src/ge/single_op/single_op_manager.cc
+++ b/src/ge/single_op/single_op_manager.cc
@@ -41,17 +41,18 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOpManager::GetOpFr
   uintptr_t resource_id;
   // runtime uses NULL to denote a default stream for each device
   if (stream == nullptr) {
-    // use device id as resource key instead
-    int32_t dev_id = 0;
-    auto rt_err = rtGetDevice(&dev_id);
+    // get current context
+    rtContext_t rt_cur_ctx = nullptr;
+    auto rt_err = rtCtxGetCurrent(&rt_cur_ctx);
     if (rt_err != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "Get current device id failed. ret = %d", static_cast<int>(rt_err));
+      GELOGE(RT_FAILED, "get current context failed, runtime result is %d", static_cast<int>(rt_err));
       return RT_FAILED;
     }
-
-    GELOGI("GetOpFromModel with default stream. device id = %d", dev_id);
-    resource_id = static_cast<uintptr_t>(dev_id);
+    // use current context as resource key instead
+    GELOGI("use context as resource key instead when default stream");
+    resource_id = reinterpret_cast<uintptr_t>(rt_cur_ctx);
   } else {
+    GELOGI("use stream as resource key instead when create stream");
     resource_id = reinterpret_cast<uintptr_t>(stream);
   }
 
diff --git a/src/ge/stub/Makefile b/src/ge/stub/Makefile
new file mode 100644
index 00000000..a0b35b42
--- /dev/null
+++ b/src/ge/stub/Makefile
@@ -0,0 +1,6 @@
+inc_path := $(shell pwd)/inc/external/
+out_path := $(shell pwd)/out/atc/lib64/stub/
+stub_path := $(shell pwd)/framework/domi/stub/
+
+mkdir_stub := $(shell mkdir -p $(out_path))
+local_stub := $(shell $(HI_PYTHON) $(stub_path)/gen_stubapi.py $(inc_path) $(out_path))
diff --git a/src/ge/stub/README b/src/ge/stub/README
new file mode 100644
index 00000000..ca98ce85
--- /dev/null
+++ b/src/ge/stub/README
@@ -0,0 +1,4 @@
+###################################################################################
+the directory (stub) saves the stub file
+gen_stubapi.py is using for retrieving API and generating stub functions
+###################################################################################
diff --git a/src/ge/stub/gen_stubapi.py b/src/ge/stub/gen_stubapi.py
new file mode 100644
index 00000000..6185c479
--- /dev/null
+++ b/src/ge/stub/gen_stubapi.py
@@ -0,0 +1,573 @@
+import os
+import re
+import sys
+import logging
+
+logging.basicConfig(stream=sys.stdout, format='[%(asctime)s] [%(lineno)s] %(levelname)s: %(message)s',
+                    level=logging.INFO)
+
+"""
+    this attr is used for symbol table visible
+"""
+GE_ATTR = 'GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY'
+
+"""
+    generate stub func body by return type
+"""
+RETURN_STATEMENTS = {
+    'graphStatus': '    return GRAPH_SUCCESS;',
+    'Status': '    return SUCCESS;',
+    'Graph': '    return Graph();',
+    'Graph&': '    return *this;',
+    'Format': '    return Format();',
+    'Format&': '    return *this;',
+    'Shape': '    return Shape();',
+    'Shape&': '    return *this;',
+    'TensorDesc': '    return TensorDesc();',
+    'TensorDesc&': '    return *this;',
+    'Tensor': '    return Tensor();',
+    'Tensor&': '    return *this;',
+    'Operator': '    return Operator();',
+    'Operator&': '    return *this;',
+    'Ptr': '    return nullptr;',
+    'std::string': '    return "";',
+    'std::string&': '    return "";',
+    'string': ' return "";',
+    'int': '    return 0;',
+    'DataType': '    return DT_FLOAT;',
+    'InferenceContextPtr': '    return nullptr;',
+    'SubgraphBuilder': '    return nullptr;',
+    'OperatorImplPtr': '    return nullptr;',
+    'OutHandler': '    return nullptr;',
+    'std::vector<std::string>': '    return {};',
+    'std::vector<int64_t>': '    return {};',
+    'std::map': '    return {};',
+    'uint32_t': '    return 0;',
+    'int64_t': '    return 0;',
+    'uint64_t': '    return 0;',
+    'size_t': '    return 0;',
+    'float': '    return 0.0f;',
+    'bool': '    return false;',
+}
+
+"""
+    max code len per line in hua_wei software programming specifications
+"""
+max_code_len_per_line = 100
+
+"""
+    white_list_for_debug, include_dir_key_words is to
+    determines which header files to generate cc files from
+    when DEBUG on
+"""
+white_list_for_debug = ["operator.h", "tensor.h",
+                        "graph.h", "operator_factory.h",
+                        "ge_ir_build.h"]
+include_dir_key_words = ["ge", "graph"]
+DEBUG = True
+
+
+def need_generate_func(func_line):
+    """
+    :param func_line:
+    :return:
+    """
+    if func_line.strip().endswith("default") or func_line.strip().endswith("delete") \
+            or func_line.strip().startswith("typedef") or func_line.strip().startswith("using"):
+        return False
+    return True
+
+
+def file_endswith_white_list_suffix(file):
+    """
+    :param file:
+    :return:
+    """
+    if DEBUG:
+        for suffix in white_list_for_debug:
+            if file.endswith(suffix):
+                return True
+        return False
+    else:
+        return True
+
+
+"""
+    belows are patterns used for analyse .h file
+"""
+# pattern function
+pattern_func = re.compile(r"""(^[\s]*)          #leading with space,we will find and delete after
+([a-zA-Z~_]            # void int likely
+.*
+[)]                     #we find )
+(?!.*{)                 # we do not want the case int abc() const { return 1;}
+.*)
+(;.*)                   #we want to find ; and after for we will replace these later
+\n$
+""", re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+# pattern comment
+pattern_comment = re.compile(r'^\s*//')
+pattern_comment_2_start = re.compile(r'^\s*/[*]')
+pattern_comment_2_end = re.compile(r'[*]/\s*$')
+# pattern define
+pattern_define = re.compile(r'^\s*#define')
+pattern_define_return = re.compile(r'\\\s*$')
+# blank line
+pattern_blank_line = re.compile(r'^\s*$')
+# virtual,explicit,friend,static
+pattern_keyword = re.compile(r'(virtual\s+|explicit\s+|friend\s+|static\s+)')
+# lead space
+pattern_leading_space = re.compile(r'(^[\s]*)[a-zA-Z~_]')
+# functions will have patterns such as func ( or func(
+# but operator is an exception; the class name is preceded by an operator, and the above mode does not exist
+# format like :"operator = ()"
+pattern_func_name = re.compile(r'([a-zA-Z0-9~_\-]+\s*|operator?.*)[(]')
+# template
+pattern_template = re.compile(r'^\s*template')
+pattern_template_end = re.compile(r'>\s*$')
+# namespace
+pattern_namespace = re.compile(r'namespace.*{')
+# class : which can handle classA a and {not on the same line, but if found ';' after class,then don't deal with
+pattern_class = re.compile(r'^[\s]*(class|struct)\s+(%s\s+)?([a-zA-Z0-9_\-]+<?)(?!.*;)' % GE_ATTR)
+# {}
+pattern_start = re.compile('{')
+pattern_end = re.compile('}')
+
+line_index = 0
+
+
+class H2CC(object):
+    def __init__(self, input_file, output_file, shared_includes_content):
+        """
+        :param input_file:
+        :param output_file:
+        :param shared_includes_content:
+        """
+        self.input_file = input_file
+        self.output_file = output_file
+        self.shared_includes_content = shared_includes_content
+        self.line_index = 0
+        self.input_fd = open(self.input_file, 'r')
+        self.input_content = self.input_fd.readlines()
+        self.output_fd = open(self.output_file, 'w')
+
+        # The state may be normal_now(in the middle of {}),class_now,namespace_now
+        self.stack = []
+        self.stack_class = []
+        self.stack_template = []
+        # record funcs generated by h2cc func
+        self.func_list_exist = []
+
+    def __del__(self):
+        self.input_fd.close()
+        self.output_fd.close()
+        del self.stack
+        del self.stack_class
+        del self.stack_template
+        del self.func_list_exist
+
+    def just_skip(self):
+        # skip blank line or comment
+        if pattern_blank_line.search(self.input_content[self.line_index]) or pattern_comment.search(
+                self.input_content[self.line_index]):  # /n or comment using //
+            self.line_index += 1
+        if pattern_comment_2_start.search(self.input_content[self.line_index]):  # comment using /*
+            while not pattern_comment_2_end.search(self.input_content[self.line_index]):  # */
+                self.line_index += 1
+            self.line_index += 1
+        # skip define
+        if pattern_define.search(self.input_content[self.line_index]):
+            while pattern_blank_line.search(self.input_content[self.line_index]) or pattern_define_return.search(
+                    self.input_content[self.line_index]):
+                self.line_index += 1
+            self.line_index += 1
+
+    def write_inc_content(self):
+        for shared_include_content in self.shared_includes_content:
+            self.output_fd.write(shared_include_content)
+
+    def h2cc(self):
+        """
+        :return:
+        """
+        logging.info("start generate cc_file[%s] from h_file[%s]", self.output_file, self.input_file)
+        global pattern_comment
+        global pattern_comment_2_start
+        global pattern_comment_2_end
+        global pattern_blank_line
+        global pattern_func
+        global pattern_keyword
+        global pattern_leading_space
+        global pattern_func_name
+        global pattern_template
+        global pattern_template_end
+        global pattern_namespace
+        global pattern_class
+        global pattern_start
+        global pattern_end
+        global line_index
+        # write inc content
+        self.write_inc_content()
+        # core processing cycle, process the input .h file by line
+        while self.line_index < len(self.input_content):
+            # handle comment and blank line
+            self.just_skip()
+
+            # match namespace
+            self.handle_namespace()
+
+            # match template
+            template_string = self.handle_template()
+            # match class
+            line = self.input_content[self.line_index]
+            match_class = pattern_class.search(line)
+            match_start = pattern_start.search(line)
+            handle_class_result = self.handle_class(template_string, line, match_start, match_class)
+            if handle_class_result == "continue":
+                continue
+
+            # match "}"
+            handle_stack_result = self.handle_stack(match_start)
+            if handle_stack_result == "continue":
+                continue
+            # handle func
+            handle_func1_result, line, start_i = self.handle_func1(line)
+            if handle_func1_result == "continue":
+                continue
+
+            # here means func is found
+            # delete key word
+            line = pattern_keyword.sub('', line)
+            logging.info("line[%s]", line)
+
+            # Class member function
+            # if friend we will not add class name
+            friend_match = re.search('friend ', line)
+            if len(self.stack_class) > 0 and not friend_match:
+                line, func_name = self.handle_class_member_func(line, template_string)
+            # Normal functions
+            else:
+                line, func_name = self.handle_normal_func(line, template_string)
+
+            need_generate = need_generate_func(line)
+            # func body
+            line += self.implement_function(line)
+            # comment
+            line = self.gen_comment(start_i) + line
+            # write to out file
+            self.write_func_content(line, func_name, need_generate)
+            # next loop
+            self.line_index += 1
+
+        logging.info('Added %s functions', len(self.func_list_exist))
+        logging.info('Successfully converted,please see ' + self.output_file)
+
+    def handle_func1(self, line):
+        """
+        :param line:
+        :return:
+        """
+        find1 = re.search('[(]', line)
+        if not find1:
+            self.line_index += 1
+            return "continue", line, None
+        find2 = re.search('[)]', line)
+        start_i = self.line_index
+        space_match = pattern_leading_space.search(line)
+        # deal with
+        # int abc(int a,
+        #        int b)
+        if find1 and (not find2):
+            self.line_index += 1
+            line2 = self.input_content[self.line_index]
+            if space_match:
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+            line += line2
+            while self.line_index < len(self.input_content) and (not re.search('[)]', line2)):
+                self.line_index += 1
+                line2 = self.input_content[self.line_index]
+                line2 = re.sub('^' + space_match.group(1), '', line2)
+                line += line2
+
+        match_start = pattern_start.search(self.input_content[self.line_index])
+        match_end = pattern_end.search(self.input_content[self.line_index])
+        if match_start:  # like  ) {  or ) {}    int the last line
+            if not match_end:
+                self.stack.append('normal_now')
+            ii = start_i
+            while ii <= self.line_index:
+                ii += 1
+            self.line_index += 1
+            return "continue", line, start_i
+        logging.info("line[%s]", line)
+        # '  int abc();'->'int abc()'
+        (line, match) = pattern_func.subn(r'\2\n', line)
+        logging.info("line[%s]", line)
+        # deal with case:
+        # 'int \n abc(int a, int b)'
+        if re.search(r'^\s*(inline)?\s*[a-zA-Z0-9_]+\s*$', self.input_content[start_i - 1]):
+            line = self.input_content[start_i - 1] + line
+        line = line.lstrip()
+        if not match:
+            self.line_index += 1
+            return "continue", line, start_i
+        return "pass", line, start_i
+
+    def handle_stack(self, match_start):
+        """
+        :param match_start:
+        :return:
+        """
+        line = self.input_content[self.line_index]
+        match_end = pattern_end.search(line)
+        if match_start:
+            self.stack.append('normal_now')
+        if match_end:
+            top_status = self.stack.pop()
+            if top_status == 'namespace_now':
+                self.output_fd.write(line + '\n')
+            elif top_status == 'class_now':
+                self.stack_class.pop()
+                self.stack_template.pop()
+        if match_start or match_end:
+            self.line_index += 1
+            return "continue"
+
+        if len(self.stack) > 0 and self.stack[-1] == 'normal_now':
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_class(self, template_string, line, match_start, match_class):
+        """
+        :param template_string:
+        :param line:
+        :param match_start:
+        :param match_class:
+        :return:
+        """
+        if match_class:  # we face a class
+            self.stack_template.append(template_string)
+            self.stack.append('class_now')
+            class_name = match_class.group(3)
+
+            # class template specializations: class A<u,Node<u> >
+            if '<' in class_name:
+                k = line.index('<')
+                fit = 1
+                for ii in range(k + 1, len(line)):
+                    if line[ii] == '<':
+                        fit += 1
+                    if line[ii] == '>':
+                        fit -= 1
+                    if fit == 0:
+                        break
+                class_name += line[k + 1:ii + 1]
+            logging.info('class_name[%s]', class_name)
+            self.stack_class.append(class_name)
+            while not match_start:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                match_start = pattern_start.search(line)
+            self.line_index += 1
+            return "continue"
+        return "pass"
+
+    def handle_template(self):
+        line = self.input_content[self.line_index]
+        match_template = pattern_template.search(line)
+        template_string = ''
+        if match_template:
+            match_template_end = pattern_template_end.search(line)
+            template_string = line
+            while not match_template_end:
+                self.line_index += 1
+                line = self.input_content[self.line_index]
+                template_string += line
+                match_template_end = pattern_template_end.search(line)
+            self.line_index += 1
+        return template_string
+
+    def handle_namespace(self):
+        line = self.input_content[self.line_index]
+        match_namespace = pattern_namespace.search(line)
+        if match_namespace:  # we face namespace
+            self.output_fd.write(line + '\n')
+            self.stack.append('namespace_now')
+            self.line_index += 1
+
+    def handle_normal_func(self, line, template_string):
+        template_line = ''
+        self.stack_template.append(template_string)
+        if self.stack_template[-1] != '':
+            template_line = re.sub(r'\s*template', 'template', self.stack_template[-1])
+            # change '< class T = a, class U = A(3)>' to '<class T, class U>'
+            template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+            template_line = re.sub(r'\s*=.*,', ',', template_line)
+            template_line = re.sub(r'\s*=.*', '', template_line)
+        line = re.sub(r'\s*=.*,', ',', line)
+        line = re.sub(r'\s*=.*\)', ')', line)
+        line = template_line + line
+        self.stack_template.pop()
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def handle_class_member_func(self, line, template_string):
+        template_line = ''
+        x = ''
+        if template_string != '':
+            template_string = re.sub(r'\s*template', 'template', template_string)
+            template_string = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_string)
+            template_string = re.sub(r'\s*=.*,', ',', template_string)
+            template_string = re.sub(r'\s*=.*', '', template_string)
+        if self.stack_template[-1] != '':
+            if not (re.search(r'<\s*>', stack_template[-1])):
+                template_line = re.sub(r'^\s*template', 'template', stack_template[-1])
+                if not (re.search(r'<.*>', self.stack_class[-1])):
+                    # for x we get like template<class T, typename U> -> <T,U>
+                    x = re.sub(r'template\s*<', '<', template_line)  # remove template -> <class T, typename U>
+                    x = re.sub(r'\n', '', x)
+                    x = re.sub(r'\s*=.*,', ',', x)
+                    x = re.sub(r'\s*=.*\>', '>', x)
+                    x = x.rstrip()  # remove \n
+                    x = re.sub(r'(class|typename)\s+|(<class>|<typename>\s*class)', '',
+                               x)  # remove class,typename ->  <T, U>
+                    x = re.sub(r'<\s+', '<', x)
+                    x = re.sub(r'\s+>', '>', x)
+                    x = re.sub(r'\s+,', ',', x)
+                    x = re.sub(r',\s+', ', ', x)
+        line = re.sub(r'\s*=\s+0', '', line)
+        line = re.sub(r'\s*=\s+.*,', ',', line)
+        line = re.sub(r'\s*=\s+.*\)', ')', line)
+        logging.info("x[%s]\nline[%s]", x, line)
+        # if the function is long, void ABC::foo()
+        # breaks into two lines void ABC::\n foo()
+        temp_line = pattern_func_name.sub(self.stack_class[-1] + x + '::' + r'\1(', line, count=1)
+        if len(temp_line) > max_code_len_per_line:
+            line = pattern_func_name.sub(self.stack_class[-1] + x + '::\n' + r'\1(', line, count=1)
+        else:
+            line = temp_line
+        logging.info("line[%s]", line)
+        # add template as the above if there is one
+        template_line = re.sub(r'\s*=.*>(\s*)$', r'>\1', template_line)
+        template_line = re.sub(r'\s*=.*,', ',', template_line)
+        template_line = re.sub(r'\s*=.*', '', template_line)
+        line = template_line + template_string + line
+        func_name = re.search(r'^.*\)', line, re.MULTILINE | re.DOTALL).group()
+        logging.info("line[%s]", line)
+        logging.info("func_name[%s]", func_name)
+        return line, func_name
+
+    def write_func_content(self, content, func_name, need_generate):
+        if not (func_name in self.func_list_exist) and need_generate:
+            self.output_fd.write(content)
+            self.func_list_exist.append(func_name)
+            logging.info('add func:[%s]', func_name)
+
+    def gen_comment(self, start_i):
+        comment_line = ''
+        # Function comments are on top of function declarations, copy them over
+        k = start_i - 1  # one line before this func start
+        if pattern_template.search(self.input_content[k]):
+            k -= 1
+        if pattern_comment_2_end.search(self.input_content[k]):
+            comment_line = self.input_content[k].lstrip()
+            while not pattern_comment_2_start.search(self.input_content[k]):
+                k -= 1
+                comment_line = self.input_content[k].lstrip() + comment_line
+        else:
+            for j in range(k, 0, -1):
+                c_line = self.input_content[j]
+                if pattern_comment.search(c_line):
+                    c_line = re.sub(r'\s*//', '//', c_line)
+                    comment_line = c_line + comment_line
+                else:
+                    break
+        return comment_line
+
+    @staticmethod
+    def implement_function(func):
+        function_def = ''
+        function_def += '{\n'
+
+        all_items = func.split()
+        start = 0
+        return_type = all_items[start]
+        if return_type == "const":
+            start += 1
+            return_type = all_items[start]
+        if return_type.startswith(('std::map', 'std::set', 'std::vector')):
+            return_type = "std::map"
+        if return_type.endswith('*') or (len(all_items) > start + 1 and all_items[start + 1].startswith('*')):
+            return_type = "Ptr"
+        if len(all_items) > start + 1 and all_items[start + 1].startswith('&'):
+            return_type += "&"
+        if RETURN_STATEMENTS.__contains__(return_type):
+            function_def += RETURN_STATEMENTS[return_type]
+        else:
+            logging.warning("Unhandled return type[%s]", return_type)
+
+        function_def += '\n'
+        function_def += '}\n'
+        function_def += '\n'
+        return function_def
+
+
+def collect_header_files(path):
+    """
+    :param path:
+    :return:
+    """
+    header_files = []
+    shared_includes_content = []
+    for root, dirs, files in os.walk(path):
+        files.sort()
+        for file in files:
+            if file.find("git") >= 0:
+                continue
+            if not file.endswith('.h'):
+                continue
+            file_path = os.path.join(root, file)
+            file_path = file_path.replace('\\', '/')
+            header_files.append(file_path)
+            include_str = '#include "{}"\n'.format(file_path[path.rindex('/') + 1:])
+            shared_includes_content.append(include_str)
+    return header_files, shared_includes_content
+
+
+def generate_stub_file(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    target_header_files, shared_includes_content = collect_header_files(inc_dir)
+    for header_file in target_header_files:
+        if not file_endswith_white_list_suffix(header_file):
+            continue
+        cc_file = re.sub('.h*$', '.cc', header_file)
+        h_2_cc = H2CC(header_file, out_cc_dir + cc_file[cc_file.rindex('/') + 1:], shared_includes_content)
+        h_2_cc.h2cc()
+
+
+def gen_code(inc_dir, out_cc_dir):
+    """
+    :param inc_dir:
+    :param out_cc_dir:
+    :return:
+    """
+    if not inc_dir.endswith('/'):
+        inc_dir += '/'
+    if not out_cc_dir.endswith('/'):
+        out_cc_dir += '/'
+    for include_dir_key_word in include_dir_key_words:
+        generate_stub_file(inc_dir + include_dir_key_word, out_cc_dir)
+
+
+if __name__ == '__main__':
+    inc_dir = sys.argv[1]
+    out_cc_dir = sys.argv[2]
+    gen_code(inc_dir, out_cc_dir)
diff --git a/src/proto/fusion_model.proto b/src/proto/fusion_model.proto
index 2ff6b77a..6220963c 100644
--- a/src/proto/fusion_model.proto
+++ b/src/proto/fusion_model.proto
@@ -17,9 +17,10 @@
 syntax = "proto3";
 
 import "om.proto";
+
 package domi;
 
 message FusionModelDef {
     string version = 1;
     repeated OpDef fusion_op = 2;
-}
+}
\ No newline at end of file
diff --git a/tests/st/resnet50/common.cc b/tests/st/resnet50/common.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc b/tests/ut/ge/graph/passes/flow_ctrl_pass_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc b/tests/ut/ge/graph/passes/folding_kernel/expanddims_kernel_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/graph/passes/merge_pass_unittest.cc b/tests/ut/ge/graph/passes/merge_pass_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/graph/passes/net_output_pass_unittest.cc b/tests/ut/ge/graph/passes/net_output_pass_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc b/tests/ut/ge/graph/passes/snapshot_pass_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/single_op/single_op_manager_unittest.cc b/tests/ut/ge/single_op/single_op_manager_unittest.cc
old mode 100755
new mode 100644
diff --git a/tests/ut/ge/single_op/single_op_model_unittest.cc b/tests/ut/ge/single_op/single_op_model_unittest.cc
old mode 100755
new mode 100644
diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
index 097eccc5..04e1cea3 100644
--- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h
@@ -1029,9 +1029,9 @@ REG_OP(BesselI1e)
 * y: A Tensor of type UnaryDataType.
 
 * @attention Constraints:
-* @li "base" is supposed to be greater than 0. Retaining the default \n
+* @li "base" is supposed to be greater than 0. Retaining the default
 * value "-1" sets "base" to "e".
-* @li If the input value of operator Log is within the range (0, 0.01] or \n
+* @li If the input value of operator Log is within the range (0, 0.01] or
 * [0.95, 1.05], the output accuracy is subject to change.
 
 * @par Third-party framework compatibility
@@ -1047,11 +1047,11 @@ REG_OP(Log)
     .OP_END_FACTORY_REG(Log)
 
 /**
-* @brief Returns x1 * x2 element-wise.\n
+* @brief Returns x1 * x2 element-wise.
 * y = x1 * x2
 
 * @par Inputs:
-* @li x1: A Tensor. Must be one of the following types: float16, float32,\n
+* @li x1: A Tensor. Must be one of the following types: float16, float32,
 * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128.
 * @li x2: A Tensor. Must be one of the following types: float16, float32,
 * float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128.
@@ -1079,7 +1079,7 @@ REG_OP(Mul)
     .OP_END_FACTORY_REG(Mul)
 
 /**
-* @brief Computes the gradient of the square root of "x" with regard to its\n
+* @brief Computes the gradient of the square root of "x" with regard to its
 * input. grad = dy * 0.5/y, where y = sqrt(x), and "dy" is the corresponding
 * input gradient.
 
@@ -3022,6 +3022,7 @@ REG_OP(CosineEmbeddingLoss)
 *@brief Kullback-Leibler divergence.
 
 *@par Inputs:
+* Two inputs, including:
 *@li x: Tensor of arbitrary shape.
 *@li target: Tensor of the same shape and dtype as x.
 
diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h
index 9b3694f1..f5ddaf5e 100644
--- a/third_party/fwkacllib/inc/ops/image_ops.h
+++ b/third_party/fwkacllib/inc/ops/image_ops.h
@@ -934,7 +934,6 @@ REG_OP(EncodeJpeg)
 
 /**
 *@brief PNG-encode an image.
-
 *@par Inputs:
 *Input image must be unit8 or uint16 type. Inputs include: \n
 *image: is a 3-D uint8 or uint16 Tensor of shape [height, width, channels] \n
@@ -1224,6 +1223,16 @@ REG_OP(CombinedNonMaxSuppression)
     .ATTR(clip_boxes, Bool, true)
     .OP_END_FACTORY_REG(CombinedNonMaxSuppression)
 
+REG_OP(SpatialTransformerD)
+    .INPUT(x, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OPTIONAL_INPUT(theta, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .OUTPUT(y, TensorType({DT_FLOAT,DT_FLOAT16}))
+    .ATTR(output_size, ListInt, {-1, -1})
+    .ATTR(default_theta, ListFloat, {})
+    .ATTR(align_corners, Bool, false)
+    .ATTR(use_default_theta, ListBool, {})
+    .OP_END_FACTORY_REG(SpatialTransformerD)
+
 }  // namespace ge
 
 #endif  // GE_OP_MAGE_OPS_H_
diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
index 625b0f85..29cf0df3 100644
--- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h
@@ -93,31 +93,49 @@ REG_OP(MatMulV2)
 
 *@par Inputs:
 *Five inputs, including:
-*@li a: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. Has format [FRACTAL_NZ].
-*@li b: A matrix Tensor. 4D. Must be one of the following types:\n float16, int8. When type is int8, has format [FRACTAL_Z], \n otherwise has format [FRACTAL_NZ].
-*@li c: A matrix Tensor. 2D or higher. Must be one of the following types: \n float16, int32, float32. When type is int32, has format [ND], \n otherwise has format [FRACTAL_NZ].
-*@li alpha: A 1D Tensor. The shape of alpha is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND].
-*@li beta: A 1D Tensor. The shape of beta is [1].\n Must be one of the following types: float16, int32, float32. Has format [ND].
+*@li a: A matrix Tensor. Must be one of the following types: float16, int8.
+* Has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
+*@li b: A matrix Tensor. Must be one of the following types: float16, int8.
+* Has format [ND, FRACTAL_NZ, FRACTAL_Z]. 2D(ND) or 4D(FRACTAL_NZ, FRACTAL_Z).
+*@li c: A matrix Tensor. Must be one of the following types: float16, int32,
+* float32. has format [ND, FRACTAL_NZ]. 2D(ND) or 4D(FRACTAL_NZ).
+*@li alpha: A 1D Tensor. The shape of alpha is [1].Must be one of the following
+* types: float16, int32, float32. Has format [ND].
+*@li beta: A 1D Tensor. The shape of beta is [1]. Must be one of the following
+* types: float16, int32, float32. Has format [ND].
+* The format of a, b, c has restriction:\n
+* When type of a is int8 and type of c is int32, the format of a, b, c should
+* all be ND, or a is FRACTAL_NZ and b is FRACTAL_Z and c is ND.\n
+* When type of a is int8 and type of c is float32, the format of a, b, c should
+* all be ND or a is FRACTAL_NZ and b is FRACTAL_Z and c is FRACTAL_NZ.\n
+* When type of a is float16 and type of c is float16, the format of a, b, c
+* should all be ND or FRACTAL_NZ.\n
+* When type of a is float16 and type of c is float32, the format of a, b, c
+* should all be ND or FRACTAL_NZ.
 
 *@par Attributes:
 *Two attributes, including:
-*@li transpose_a: Optional. A bool.\n If True, changes the shape of "a" from [M, K] to [K, M].\n Reserved parameters, not used for now.
-*@li transpose_b: Optional. A bool.\n If True, changes the shape of "b" from [M, K] to [K, M].\n Reserved parameters, not used for now.
+*@li transpose_a: Optional. A bool. If True, changes the shape of "a" from
+* [M, K] to [K, M].
+*@li transpose_b: Optional. A bool. If True, changes the shape of "b" from
+* [K, N] to [N, K].
 
 *@par Outputs:
-*@out: The result matrix Tensor. 4D. Must be one of the following types:\n float16, float32, int32. Has format [FRACTAL_NZ].
+*y: The result matrix Tensor. Must be one of the following types: float16,
+* float32, int32. Has format [ND, FRACTAL_NZ], the format should be equal to a.
+* 2D(ND) or 4D(FRACTAL_NZ).
 */
 
-REG_OP(Gemm)
+REG_OP(GEMM)
     .INPUT(a, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(b, TensorType({DT_FLOAT16, DT_INT8}))
     .INPUT(c, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
     .INPUT(alpha, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
     .INPUT(beta, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
-    .OUTPUT(out, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT32}))
     .ATTR(transpose_a, Bool, false)
     .ATTR(transpose_b, Bool, false)
-    .OP_END_FACTORY_REG(Gemm)
+    .OP_END_FACTORY_REG(GEMM)
 
 /**
 *@brief Multiplies matrix "a" by matrix "b", producing "a * b".
diff --git a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
index b89287e9..e8eb4769 100644
--- a/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_batch_norm_ops.h
@@ -361,14 +361,14 @@ REG_OP(BatchNormGradExt2)
 *@par Inputs:
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
-*@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional string, input x's Scale factor
+*@li variance: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the variance used for inference.
+*@li momentum:  A Tensor of type float32 or float16, represents the mean and the variance's scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional input, not use
+*@li mode: An optional attr, not use
 *@par Outputs:\n
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
 */
@@ -391,7 +391,7 @@ REG_OP(BNInference)
 
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional float, input x's Scale factor
+*@li momentum: A Tensor of type float32 or float16, the mean and the variance's Scale factor
 *@par Attributes:
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
@@ -420,13 +420,13 @@ REG_OP(BnHost)
 *@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
 *@li mean: A Tensor of type float32 or float16. Must be 1D if input "x"  Specifies the mean used for inference.
 *@li variance: A Tensor of type float32 or float16 . Must be 1D if input "x"  Specifies the variance used for inference.
-*@li momentum: An optional float, input x's Scale factor
 *@li scale: An optional tensor of type float16 or float32, no use
 *@li offset: An optional tensor of type float16 or float32, no use
 *@par Attributes:
+*@li momentum: An optional float32 num, represents the mean and the variance's scale factor
 *@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.00001".
 *@li use_global_stats: mean inference mode , only can be "True".
-*@li mode: An optional inpout, not use
+*@li mode: An optional attr, not use
 *@par Outputs:\n
 *@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x"
 */
diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
index f904f191..dfb23cb3 100644
--- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h
@@ -62,7 +62,7 @@ namespace ge {
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
-* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n
+* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512.
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -119,7 +119,7 @@ REG_OP(DepthwiseConv2DBackpropFilter)
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
 * Limited by Tiling and L1 / L0 buffer memory: 512 * ceil(Wo, 16) + (480 *
-* stride_h + 32 * filter_h) * ceil(Wi, 16) �?l1_size and Hf*Wf �?l0b_size/512.\n
+* stride_h + 32 * filter_h) * ceil(Wi, 16) <= l1_size and Hf*Wf <= l0b_size/512.
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator DepthwiseConv2DBackpropFilter.
@@ -178,7 +178,7 @@ REG_OP(DepthwiseConv2DBackpropFilterD)
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
-* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf *
+* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
 * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n
 
 * @par Third-party framework compatibility
@@ -235,7 +235,7 @@ REG_OP(DepthwiseConv2DBackpropInput)
 * Output backprop is 4D with shape [N, C, Ho, Wo] or [N, Ho, Wo, C], but the
 * data is 5D with shape [N, C1, Ho, Wo, C0],
 * where C is the same as that of the feature map and C0 is 16.\n
-* Limited by Tiling: max_h_in_l1 �?C0, where max_h_in_l1 = (l1_size - Hf *
+* Limited by Tiling: max_h_in_l1 >= C0, where max_h_in_l1 = (l1_size - Hf *
 * Wf * C0 * C0 * 2) / (2 * Wo *C0).\n
 
 * @par Third-party framework compatibility
@@ -459,45 +459,44 @@ REG_OP(Conv2DBackpropInputD)
 *@brief Computes the Deconvolution with respect to the input.
 *@par Inputs:
  * Three inputs:
- * @li x: A Tensor. Must have the same type as "filter". 4D with shape
- * [batch, out_height, out_width, out_channels]
- * or [batch, out_channels, out_height, out_width]. Gradients with respect
+ * @li x: A Tensor of type float16 or int8.  4D with shape
+ * [batch, out_channels, out_height, out_width]. Gradients with respect
  * to the output of the convolution.
- * @li filter: A Tensor of type float16.
- * 4D with shape [filter_height, filter_width, in_channels, out_channels],
- * or [out_channels, filter_height, filter_width, in_channels],
- * or [out_channels, in_channel, filter_height, filter_width].
+ * @li filter: A Tensor. Must have the same type as "x".
+ * 4D with shape [out_channels, in_channel, filter_height, filter_width].\n
  * Two optional inputs:
- * @li bias: An optional tensor of type float16
- * @li offset_w: An optional 1D tensor for quantized deconvolution. Reserved.\n
+ * @li bias: An optional tensor. Must have the same type as "y".
+ * @li offset_w: An optional 1D tensor for quantized deconvolution.
+ * Type is int8. Reserved.\n
 *@par Attributes:
  * Six attributes:
  * @li strides: A tuple or list of 2 integers. The stride of the sliding window
  * for H/W dimension.
  * @li pads: A tuple or list of 4 integers. The [top, bottom, left, right]
- * padding on the feature map
+ * padding on the feature map.
  * @li dilations: A tuple or list of 4 integers. The dilation factor for each
  * dimension of input. Must be [1, 1, 1, 1].
- * @li groups: Number of blocked connections from input channels to \n
- output channels.
- * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC".\n
+ * @li groups: Number of blocked connections from input channels to
+ output channels. Defaults to "1".
+ * @li data_format: An optional string from: "NCHW". Defaults to "NCHW". \n
   Specify the data format of the input and output data.
- * @li offset_x: An optional integer for quantized deconvolution.
+ * @li offset_x: An optional integer for quantized deconvolution. Defaults to "0".
 *@par Outputs:
- * y: A Tensor. Has the same type as "filter". 4D tensor with shape
- * [batch, height, width, channels] or [batch, channels, height, width].
+ * y: A Tensor. 4D tensor with shape [batch, channels, height, width].
+ * When type of x is float16, the type of y must be float16.
+ * When type of x is int8, the type of y must be int32.
 */
 REG_OP(Deconvolution)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
-    .ATTR(strides, ListInt, {1, 1, 1, 1})
-    .ATTR(pads, ListInt, {0, 0, 0, 0})
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_INT32}))
+    .REQUIRED_ATTR(strides, ListInt)
+    .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
     .ATTR(groups, Int, 1)
-    .ATTR(data_format, String, "NHWC")
+    .ATTR(data_format, String, "NCHW")
     .ATTR(offset_x, Int, 0)
     .OP_END_FACTORY_REG(Deconvolution)
 /**
@@ -554,7 +553,7 @@ REG_OP(Conv2DBackpropFilter)
  * @li groups: Number of blocked connections from input channels to output channels.
  * @li data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC". Specify the data format of the input and output data.
 *@par Outputs:
- * y: A Tensor. Has the same type as x
+ * y: A Tensor. Type is float32
 *@par Third-party framework compatibility
  * Compatible with Tensorflow's conv2d_backprop_filter
 */
@@ -586,8 +585,6 @@ REG_OP(Conv2DBackpropFilterD)
                |---------|---------|---------|----------|--------
                | float32 | float32 | float32 | _        | float32
                |---------|---------|---------|----------|--------
-               | float64 | float64 | float64 | _        | float64
-               |---------|---------|---------|----------|--------
                | int8    | int8    | int32   | int8     | int32
     -----------|---------|---------|---------|----------|--------
      Format    | NCHW    | NCHW    | ND      | ND       | NCHW
@@ -607,7 +604,7 @@ REG_OP(Conv2DBackpropFilterD)
 * for dilated convolution. Has the same dimension order and value as "strides".
 * @li groups: Number of blocked connections from input channels to output
 * channels. Input channels and output channels must both be divisible by
-* "groups". Must be set to 1.
+* "groups".
 * @li offset_x: An optional integer for quantized convolution.
 * @li data_format: An optional string from: "NHWC", "NCHW". Specifying the
 * data format of the input and output images. Reserved.
@@ -642,7 +639,7 @@ REG_OP(Conv2DBackpropFilterD)
 * @verbatim
      Output           | Restrictions
     ------------------|----------------------------------------------
-     W dimension == 1 | HxW(input) == HxW(filter) == 1x1,2x2...11x11.
+     W dimension == 1 | HxW(input) == HxW(filter)
      H dimension == 1 |
     ------------------|----------------------------------------------
      W dimension == 1 | Not supported
@@ -659,11 +656,11 @@ REG_OP(Conv2DBackpropFilterD)
 *@li Compatible with the Caffe operator 2D "Convolution".
 */
 REG_OP(Conv2D)
-    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
-    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT8}))
-    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
+    .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT8}))
+    .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .OPTIONAL_INPUT(offset_w, TensorType({DT_INT8}))
-    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_DOUBLE, DT_INT32}))
+    .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
     .REQUIRED_ATTR(strides, ListInt)
     .REQUIRED_ATTR(pads, ListInt)
     .ATTR(dilations, ListInt, {1, 1, 1, 1})
diff --git a/third_party/fwkacllib/inc/ops/nn_detect_ops.h b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
index 7d6007d9..5dca8a9d 100644
--- a/third_party/fwkacllib/inc/ops/nn_detect_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_detect_ops.h
@@ -186,7 +186,7 @@ REG_OP(ROIAlignGrad)
 * Three inputs, including: \n
 *@li features: A 5HD Tensor of type float32 or float16.
 *@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs, the value "5" indicates the indexes of images where the ROIs are located, 
-* "x0", "x1", "y0", and "y1".
+* "x0", "y0", "x1", and "y1".
 *@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved.
 
 *@par Attributes:
diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
index f167dbee..5eb11445 100644
--- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h
@@ -219,7 +219,7 @@ REG_OP(MaxPool3D)
 * @attention Constraints:
 * @li Computing gradients of global pooling is not supported, which means
 * "ksize < x1".
-* @li "ksiez" is in the range [1, 255]. "strides" is in the range [1, 63]
+* @li "ksize" is in the range [1, 255]. "strides" is in the range [1, 63]
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator MaxPoolGrad.
@@ -239,10 +239,9 @@ REG_OP(MaxPoolGrad)
 * @brief Computes second-order gradients of the maxpooling function.
 
 * @par Inputs:
-* @li x1: Original forward input tensor. Supported type:float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-* @li x2: Has the same type and format as input "x1".
-* @li grad:Has the same type and format as input "x1".
+* @li x1: Original forward input tensor of type RealNumberType
+* @li x2: Original forward output tensor of type RealNumberType
+* @li grad: Gradient tensor of type RealNumberType
 
 * @par Attributes:
 * @li ksize: A required list or tuple,
@@ -258,9 +257,12 @@ REG_OP(MaxPoolGrad)
 * @li "x1" and "grads" must have the same shape.
 * @li "x2" and "y" must have the same shape. Otherwise, an error is reported.
 * @li "x1", "x2", "grads", and "y" must be 5D tensors.
+* @li ksize[H] and ksize[W] is in the range [1, 255].
+* @li strides[H] and strides[W] is in the range [1, 63].
+* @li Other dimensions of ksize and strides is 1.
 
 * @par Outputs:
-* @li y: Has the same type and format as input "x1".
+* @li y: Result tensor of type RealNumberType
 
 * @par Third-party framework compatibility
 * @li Compatible with the TensorFlow operator MaxPoolGradGrad.
@@ -399,18 +401,15 @@ REG_OP(MaxPoolGradWithArgmax)
 * @brief Computes second-order gradients of the maxpooling function.
 
 * @par Inputs:
-* @li x: Original forward input tensor. Supported type: float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-* @li grad: Gradient tensor. Supported type: float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64.
-* @li argmax: An tensor of type int32 or int64.
+* @li x: Original forward input tensor of type RealNumberType
+* @li grad: Gradient tensor of type RealNumberType
+* @li argmax: An tensor of type IndexNumberType
 * @par Attributes:
 * @li ksize: A required list, specifying the size of the sliding window.
 * @li strides: A required list, specifying the stride of the sliding window.
 * @li padding: A required string, window sliding mode. Either SAME or VALID.
 * @par Outputs:
-* @li y:Result tensor. Supported type: float, double, int32,
- * uint8, int16, int8, int64, uint16, half, uint32, uint64
+* @li y:Result tensor of type RealNumberType
 
 * @attention Constraints:
 * @li Only the cloud platform is supported.
diff --git a/third_party/fwkacllib/inc/ops/nn_training_ops.h b/third_party/fwkacllib/inc/ops/nn_training_ops.h
index 17233386..1c9aa516 100644
--- a/third_party/fwkacllib/inc/ops/nn_training_ops.h
+++ b/third_party/fwkacllib/inc/ops/nn_training_ops.h
@@ -41,7 +41,7 @@ namespace ge {
 *@li beta1: A scalar. Has the same type as "var".
 *@li beta2: A scalar. Has the same type as "var".
 *@li epsilon: A scalar. Has the same type as "var".
-*@li grad: A tensor for the gradient. Has the same type as "var". 
+*@li grad: A tensor for the gradient. Has the same type as "var".
 *
 *@par Attributes:
 * use_locking: An optional bool. Defaults to "False".
@@ -465,7 +465,7 @@ REG_OP(ApplyKerasMomentumD)
 
 
 /**
-*@brief Updates '*var' according to the Adam algorithm..
+*@brief Updates '*var' according to the Adam algorithm.
 *   lr_t := {learning_rate} * sqrt{1 - beta_2^t} / (1 - beta_1^t)
 *   m_t := beta_1 * m_{t-1} + (1 - beta_1) * g
 *   v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g
@@ -866,7 +866,7 @@ REG_OP(ApplyCenteredRMSProp)
     .OUTPUT(var, TensorType::NumberType())
     .ATTR(use_locking, Bool, false)
     .OP_END_FACTORY_REG(ApplyCenteredRMSProp)
-	
+
 /**
 *@brief Updates "var" according to the centered RMSProp algorithm.
 *  The centered RMSProp algorithm uses an estimate of the centered second moment
@@ -1262,7 +1262,7 @@ REG_OP(DataFormatDimMap)
     .OP_END_FACTORY_REG(DataFormatDimMap)
 
 /**
-* @brief Implements stochastic gradient descent (optionally with momentum).\n
+* @brief Implements stochastic gradient descent (optionally with momentum).
 * Nesterov momentum is based on the formula from
 * On the importance of initialization and momentum in deep learning.\n
 
@@ -1508,7 +1508,7 @@ REG_OP(ApplyProximalAdagradD)
 *@par Attributes:
 *use_locking: An optional bool. Defaults to "False".\n
 *     If "True", updating of the var and accum tensors will be protected by a lock; \n
-*     If "False", the behavior is undefined, but may exhibit less contention. 
+*     If "False", the behavior is undefined, but may exhibit less contention.
 
 *@par Outputs:
 *var: A mutable Tensor. Has the same type as "var".
@@ -2172,13 +2172,13 @@ REG_OP(SparseApplyFtrl)
 * Should be a Variable Tensor.
 * @li grad: A Tensor of the same type as "var", for the gradient.
 * @li indices: A vector of indices into the first dimension of var and accum.
+
+* @par Attributes:
 * @li lr: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
 * @li l1: A Tensor of the same type as "var", for L1 regulariation. Must be a scalar.
 * @li l2: A Tensor of the same type as "var", for L2 regulariation. Must be a scalar.
 * @li lr_power: A Tensor of the same type as "var", for the scaling factor. Must be a scalar.
-
-* @par Attributes:
-* use_locking: An optional bool. Defaults to "False".
+* @li use_locking: An optional bool. Defaults to "False".
 * If "True", updating of the "var" and "accum" tensors will be
 * protected by a lock; otherwise the behavior is undefined,
 * but may exhibit less contention.
@@ -2314,6 +2314,7 @@ REG_OP(SparseApplyFtrlV2D)
 *    var <- var - mom\n
 *
 * @par Inputs:
+* Nine inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in\n
 * TensorType::NumberType(). Should be from a Variable().
 * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2367,6 +2368,7 @@ REG_OP(SparseApplyRMSProp)
 *     var <- var - mom
 *
 * @par Inputs:
+* Six inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in
 * TensorType::NumberType(). Should be from a Variable().
 * @li ms: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2418,6 +2420,7 @@ REG_OP(SparseApplyRMSPropD)
 *    accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
 *
 * @par Inputs:
+* Eight inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in\n
 * TensorType::NumberType(). Should be from a Variable().
 * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
@@ -2468,6 +2471,7 @@ REG_OP(SparseApplyAdadelta)
 *    accum_update <- rho() * accum_update + (1 - rho()) * update.square()\n
 *
 * @par Inputs:
+* Seven inputs, including:
 * @li var: A mutable tensor. Must be one of the data types defined in
 * TensorType::NumberType(). Should be from a Variable().
 * @li accum: A mutable tensor. Must have the same type as "var". Should be from a
diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
index d38faf49..1405fdb7 100644
--- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
+++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h
@@ -203,11 +203,11 @@ REG_OP(Sigmoid)
 * @brief Computes z = (y - y*y)*dy.
 
 * @par Inputs:
-* @li y: the input is tensor , dtype is UnaryDataType.
-* @li dy the input is tensor , dtype is UnaryDataType.
+* @li y: The input is Tensor, dtype is UnaryDataType.
+* @li dy: The input is Tensor, dtype is UnaryDataType.
 
 * @par Outputs:
-* z: the shape of output, dtype is UnaryDataType.
+* z: The shape of output, dtype is UnaryDataType.
 */
 REG_OP(SigmoidGrad)
     .INPUT(y, TensorType(UnaryDataType))
diff --git a/third_party/fwkacllib/inc/ops/quantize_ops.h b/third_party/fwkacllib/inc/ops/quantize_ops.h
index 4a4bd606..4bf0e5bf 100644
--- a/third_party/fwkacllib/inc/ops/quantize_ops.h
+++ b/third_party/fwkacllib/inc/ops/quantize_ops.h
@@ -21,17 +21,17 @@
 namespace ge {
 
 /**
-* @brief Dequantizes the input tensor into a float tensor.\n
-* [input_min_range, input_max_range] are scalar floats that specify the range
-* for "output_data". \n
+* @brief Dequantizes the input tensor into a float tensor.
+* [min_range, max_range] are float32 tensors that specify the range
+* for "y". \n
 * The "mode" attribute controls exactly which calculations are used to convert\n
 * the float values to their quantized equivalents.
 * @par Inputs:
-* @li input_data: A Tensor. Must be one of the following types: int8, uint8,
+* @li x: A Tensor. Must be one of the following types: int8, uint8,
 * int32.
-* @li input_min_range: A Tensor of type float32.
+* @li min_range: A Tensor of type float32.
 * Specifies the minimum scalar value possibly produced for the input.
-* @li input_max_range: A Tensor of type float32.
+* @li max_range: A Tensor of type float32.
 * Specifies the maximum scalar value possibly produced for the input.
 
 * @par Attributes:
@@ -39,11 +39,11 @@ namespace ge {
 * Defaults to "MIN_COMBINED".
 
 * @par Outputs:
-* output_data: A dictionary of type float32.
+* y: A dictionary of type float32.
 
 * @attention Constraints:
-* @li "input_min_range" and "input_max_range" have the same shapes.
-* @li "input_data" and "output_data" have the same shapes.
+* @li "min_range" and "max_range" have the same shapes.
+* @li "x" and "y" have the same shapes.
 
 * @par Third-party framework compatibility
 * Compatible with the TensorFlow operator Dequantize.
diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h
index 95bcd039..aafcece0 100644
--- a/third_party/fwkacllib/inc/ops/selection_ops.h
+++ b/third_party/fwkacllib/inc/ops/selection_ops.h
@@ -149,7 +149,7 @@ REG_OP(TileD)
 * @li indices: A Tensor of type IndexNumberType.
 
 * @par Outputs:
-* output: A Tensor of type BasicType.
+* y: A Tensor of type BasicType.
 * @see GatherNd()
 
 * @attention Constraints:
@@ -767,6 +767,7 @@ REG_OP(SliceD)
 * dimension.
 
 * @par Inputs:
+* Two inputs, including:
 * @li x: A 1D or higher tensor of type float16, with the last dimension at
 * least "k".
 * Specifies the data to sort.
@@ -789,7 +790,7 @@ REG_OP(SliceD)
 * @li indices: A Tensor of type int32, specifying the indices of sorted data.
 
 * @attention Constraints:
-* @li k =< 4096
+* @li k =< 5120
 * @li Size of the last dimension =< 65500
 * @li sorted = true
 * @li Don't support to get score on the platform of Ascend310
@@ -813,6 +814,7 @@ REG_OP(TopKD)
 * dimension.
 
 * @par Inputs:
+* Two inputs, including:
 * @li x: A 1D or higher tensor of type BasicType, with the last dimension
 * at least "k".
 * @li k: A 0D Tensor of type int32.\n
@@ -902,8 +904,8 @@ REG_OP(ScatterNdD)
 * @li x2: A 1D Tensor of type int32. A batch_size tensor of class ids.
 
 * @par Attributes:
-* @li k: A required int32, specifying the number of top elements to look at for
-* computing precision.
+* @li k: A required IndexNumberType, specifying the number of top elements to
+* look at for computing precision.
 
 * @par Outputs:
 * y: A Tensor of type bool.
@@ -1000,6 +1002,7 @@ REG_OP(StridedSliceAssign)
 * "strides", etc. work exactly as in "StridedSlice".
 
 * @par Inputs:
+* Two inputs, including:
 * @li var: A mutable ND Tensor of type BasicType.
 * @li input_value: A mutable ND "Tensor" of type BasicType.
 
@@ -1335,7 +1338,7 @@ REG_OP(InplaceSubD)
     .OP_END_FACTORY_REG(InplaceSubD)
 
 /**
-* @brief Applies sparse addition to input "x" using individual values or slices\n
+* @brief Applies sparse addition to input "x" using individual values or slices
 * from "updates" according to "indices". The updates are non-aliasing: "x" is\n
 * only modified in-place if no other operations will use it. Otherwise, a copy\n
 * of "x" is made. This operation has a gradient with respect to both "x" and
@@ -1372,7 +1375,7 @@ REG_OP(ScatterNonAliasingAdd)
 * @li x: A Tensor of type RealNumberType.
 * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
 * of "x.shape".
-* @li k: A Tensor.
+* @li num_segments: A Tensor of type IndexNumberType.
 
 * @par Outputs:
 * y: A Tensor of type RealNumberType.
@@ -1419,13 +1422,13 @@ REG_OP(UnsortedSegmentMinD)
 
 * @par Inputs:
 * Three inputs, including:
-* @li x: A Tensor of type RealNumberType.
+* @li x: A Tensor of type NumberType.
 * @li segment_ids: A 1D Tensor of type IndexNumberType, whose shape is a prefix
 * of "x.shape".
-* @li k: A Tensor.
+* @li num_segments: A Tensor of type IndexNumberType.
 
 * @par Outputs:
-* y: A Tensor of type RealNumberType.
+* y: A Tensor of type NumberType.
 
 * @see UnsortedSegmentSum(), UnsortedSegmentMin(),
 
diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h
index a8258eb9..69951da9 100644
--- a/third_party/fwkacllib/inc/ops/transformation_ops.h
+++ b/third_party/fwkacllib/inc/ops/transformation_ops.h
@@ -20,19 +20,38 @@
 #include "graph/operator_reg.h"
 
 namespace ge {
+/**
+*@brief Convert tensor format from HWCN to C1HWNCoC0.
+
+*@par Inputs:
+*x: A Tensor. Must be 4D Tensor of type float16, float32, int32, uint16, with format HWCN.
+
+*@par Outputs:
+*y: A 6D Tensor. Has the same type as "x", with format C1HWNCoC0.
+*/
 REG_OP(DepthwiseWeight4DTo6D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
     .OP_END_FACTORY_REG(DepthwiseWeight4DTo6D)
 
+/**
+*@brief Convert tensor format from C1HWNCoC0 to HWCN.
+
+*@par Inputs:
+*x: A Tensor. Must be 6D Tensor of type float16, float32, int32, uint16, with format C1HWNCoC0.
+
+*@par Attributes:
+*channel_size: An optional int, specifying the channel size of 4D Tensor with format HWCN.
+
+*@par Outputs:
+*y: A 4D Tensor. Has the same type as "x", with format HWCN.
+*/
 REG_OP(DepthwiseWeight6DTo4D)
     .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
     .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_UINT16}))
     .ATTR(channel_size, Int, 16)
     .OP_END_FACTORY_REG(DepthwiseWeight6DTo4D)
 
-
-
 /**
 *@brief Permutes the dimensions according to perm.\n
         The returned tensor's dimension i will correspond to the input dimension perm[i].
@@ -390,20 +409,20 @@ REG_OP(SpaceToBatchD)
     .OP_END_FACTORY_REG(SpaceToBatchD)
 
 /**
-* @brief Unpacks the given dimension of a rank-R tensor "x" into rank-(R-1)
+* @brief Unpacks the given dimension of a rank-R Tensor "x" into rank-(R-1)
 * tensors.
 
 * @par Inputs:
 * x: A rank-R tensor (R > 0) of type BasicType, with format ND or NC1HWC0.
 
 * @par Attributes:
-* @li num: An optional int, specifying the number of tensors to be unpacked to.
+* @li num: A required int, specifying the number of tensors to be unpacked to.
 * Defaults to "None".
-* @li axis: A required int, specifying the axis to unpack along. The value range
+* @li axis: An optional int, specifying the axis to unpack along. The value range
 * is [-R, R).
 
 * @par Outputs:
-* y: The list of Tensor objects unpacked from "x", of type BasicType.
+* y: Dynamic output. The list of Tensor objects unpacked from "x", of type BasicType.
 
 * @attention Constraints:
 * @li If "num" is not specified, it is inferred from the shape of "x".
@@ -434,11 +453,11 @@ REG_OP(Unpack)
 * dimension of images.
 * @li strides: A required list or tuple. How far the centers of two consecutive
 * patches are in the images. Must be: [1, stride_rows, stride_cols, 1].
-* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1]. \n
-* This is the input stride, specifying how far two consecutive patch  \n
+* @li rates: A required list or tuple. Must be: [1, rate_rows, rate_cols, 1].\n
+* This is the input stride, specifying how far two consecutive patch\n
 * samples are in the input. Equivalent to extracting patches
 * with patch_sizes_eff = patch_sizes + (patch_sizes - 1) *\n
-* (rates - 1), followed by subsampling them spatially by a factor of rates. \n
+* (rates - 1), followed by subsampling them spatially by a factor of rates.\n
 * This is equivalent to rate in dilated (a.k.a. Atrous) convolutions.
 * @li padding: A required string. The type of padding algorithm to use.
 
diff --git a/third_party/fwkacllib/inc/register/op_registry.h b/third_party/fwkacllib/inc/register/op_registry.h
index 137309b2..1fcdf9de 100644
--- a/third_party/fwkacllib/inc/register/op_registry.h
+++ b/third_party/fwkacllib/inc/register/op_registry.h
@@ -59,6 +59,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
 
   domi::ParseParamFunc GetParseParamFunc(const std::string &op_type);
 
+  domi::ParseParamByOpFunc GetParseParamByOperatorFunc(const std::string &op_type);
+
   domi::FusionParseParamFunc GetFusionParseParamFunc(const std::string &op_type);
 
   domi::ParseSubgraphFunc GetParseSubgraphPostFunc(const std::string &op_type);
@@ -73,6 +75,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY OpRegistry {
   std::unordered_map<std::string, std::set<std::string>> op_ori_optype_map_;
   std::unordered_map<std::string, domi::ImplyType> op_run_mode_map_;
   std::unordered_map<std::string, ParseParamFunc> opParseParamsFnMap_;
+  std::unordered_map<std::string, ParseParamByOpFunc> parse_params_by_op_func_map_;
   std::unordered_map<std::string, FusionParseParamFunc> fusionOpParseParamsFnMap_;
   std::unordered_map<std::string, ParseSubgraphFunc> op_types_to_parse_subgraph_post_func_;
   std::unordered_map<std::string, std::vector<RemoveInputConfigure>> remove_input_configure_map_;
diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h
index 54621e86..ed1f13c2 100644
--- a/third_party/fwkacllib/inc/runtime/context.h
+++ b/third_party/fwkacllib/inc/runtime/context.h
@@ -98,6 +98,14 @@ RTS_API rtError_t rtCtxSynchronize(void);
  */
 RTS_API rtError_t rtCtxGetCurrent(rtContext_t *ctx);
 
+/**
+ * @ingroup rt_context
+ * @brief returns the primary context of device.
+ * @param [out] ctx   returned context
+ * @return RT_ERROR_NONE for ok
+ */
+RTS_API rtError_t rtGetPriCtxByDeviceId(int32_t device, rtContext_t *ctx);
+
 /**
  * @ingroup rt_context
  * @brief returns the device ID for the current context
diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h
index f77df225..7f9c4630 100644
--- a/third_party/fwkacllib/inc/toolchain/slog.h
+++ b/third_party/fwkacllib/inc/toolchain/slog.h
@@ -277,6 +277,7 @@ extern int dlog_setlevel(int moduleId, int level, int enableEvent);
 /**
  * @ingroup slog
  * @brief CheckLogLevel: check module level enable or not
+ * users no need to call it because all dlog interface(include inner interface) has already called
  *
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]logLevel: eg: DLOG_EVENT/DLOG_ERROR/DLOG_WARN/DLOG_INFO/DLOG_DEBUG
@@ -291,46 +292,76 @@ extern int CheckLogLevel(int moduleId, int logLevel);
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]fmt: log content
  */
-#define dlog_error(moduleId, fmt, ...)                                          \
-  do {                                                                          \
-    DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#define dlog_error(moduleId, fmt, ...)                                              \
+  do {                                                                              \
+    DlogErrorInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);     \
   } while (0)
 
 /**
  * @ingroup slog
  * @brief dlog_warn: print warning log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
  *
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]fmt: log content
  */
-#define dlog_warn(moduleId, fmt, ...)                                          \
-  do {                                                                         \
-    DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define dlog_warn(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+        DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
   } while (0)
+#else
+#define dlog_warn(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_WARN) == 1) {                                   \
+        DlogWarnInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                               \
+  } while (0)
+#endif
 
 /**
  * @ingroup slog
  * @brief dlog_info: print info log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
  *
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]fmt: log content
  */
-#define dlog_info(moduleId, fmt, ...)                                          \
-  do {                                                                         \
-    DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define dlog_info(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+        DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+  } while (0)
+#else
+#define dlog_info(moduleId, fmt, ...)                                               \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_INFO) == 1) {                                   \
+        DlogInfoInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                               \
   } while (0)
+#endif
 
 /**
  * @ingroup slog
  * @brief dlog_debug: print debug log
+ * call CheckLogLevel in advance to optimize performance, call interface with fmt input take time
  *
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]fmt: log content
  */
-#define dlog_debug(moduleId, fmt, ...)                                          \
-  do {                                                                          \
-    DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define dlog_debug(moduleId, fmt, ...)                                              \
+  do {                                                                              \
+        DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
   } while (0)
+#else
+#define dlog_debug(moduleId, fmt, ...)                                              \
+  do {                                                                              \
+    if(CheckLogLevel(moduleId, DLOG_DEBUG) == 1) {                                  \
+        DlogDebugInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+    }                                                                               \
+  } while (0)
+#endif
 
 /**
  * @ingroup slog
@@ -339,9 +370,9 @@ extern int CheckLogLevel(int moduleId, int logLevel);
  * @param [in]moduleId: module id, eg: CCE
  * @param [in]fmt: log content
  */
-#define dlog_event(moduleId, fmt, ...)                                          \
-  do {                                                                          \
-    DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#define dlog_event(moduleId, fmt, ...)                                              \
+  do {                                                                              \
+    DlogEventInner(moduleId, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);     \
   } while (0)
 
 /**
@@ -352,10 +383,19 @@ extern int CheckLogLevel(int moduleId, int logLevel);
  * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
  * @param [in]fmt: log content
  */
-#define Dlog(moduleId, level, fmt, ...)                                           \
-  do {                                                                            \
-    DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define Dlog(moduleId, level, fmt, ...)                                                 \
+  do {                                                                                  \
+        DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
   } while (0)
+#else
+#define Dlog(moduleId, level, fmt, ...)                                                 \
+  do {                                                                                  \
+    if(CheckLogLevel(moduleId, level) == 1) {                                           \
+        DlogInner(moduleId, level, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);   \
+     }                                                                                  \
+  } while (0)
+#endif
 
 /**
  * @ingroup slog
@@ -366,10 +406,19 @@ extern int CheckLogLevel(int moduleId, int logLevel);
  * @param [in]level(0: debug, 1: info, 2: warning, 3: error, 5: trace, 6: oplog, 16: event)
  * @param [in]fmt: log content
  */
-#define DlogSub(moduleId, submodule, level, fmt, ...)                                            \
-  do {                                                                                           \
-    DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define DlogSub(moduleId, submodule, level, fmt, ...)                                                   \
+  do {                                                                                                  \
+    DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);        \
+  } while (0)
+#else
+#define DlogSub(moduleId, submodule, level, fmt, ...)                                                   \
+  do {                                                                                                  \
+    if(CheckLogLevel(moduleId, level) == 1) {                                                           \
+        DlogInner(moduleId, level, "[%s:%d][%s]" fmt, __FILE__, __LINE__, submodule, ##__VA_ARGS__);    \
+    }                                                                                                   \
   } while (0)
+#endif
 
 /**
  * @ingroup slog
@@ -381,11 +430,19 @@ extern int CheckLogLevel(int moduleId, int logLevel);
  * @param [in]kvNum: key-value element num in array
  * @param [in]fmt: log content
  */
-#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...)                                           \
-  do {                                                                                                     \
-    DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__); \
+#ifdef _SKIP_TOOLCHAIN_LOG_FUNC_ABCD
+#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
+  do {                                                                                                          \
+        DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
   } while (0)
-
+#else
+#define DlogWithKV(moduleId, level, pstKVArray, kvNum, fmt, ...)                                                \
+  do {                                                                                                          \
+    if(CheckLogLevel(moduleId, level) == 1) {                                                                   \
+        DlogWithKVInner(moduleId, level, pstKVArray, kvNum, "[%s:%d]" fmt, __FILE__, __LINE__, ##__VA_ARGS__);  \
+    }                                                                                                           \
+  } while (0)
+#endif
 
 /**
  * @ingroup slog