From 279eb010c190540a230eae69f2bcd18fece7d8dc Mon Sep 17 00:00:00 2001
From: zhou_lili <zhoulili20@huawei.com>
Date: Wed, 7 Apr 2021 09:19:26 +0800
Subject: [PATCH] ge code of fuzz build

---
 ge/CMakeLists.txt                             |   2 +
 ge/ge_inference.mk                            |   1 +
 ge/ge_runner.mk                               |   1 +
 ge/generator/ge_generator.cc                  |  74 +++++++-
 ge/graph/manager/graph_manager.cc             |  23 ++-
 ge/graph/manager/graph_manager.h              |   1 +
 .../passes/mark_node_unknown_shape_pass.cc    |  99 +++++++++++
 .../passes/mark_node_unknown_shape_pass.h     |  32 ++++
 ge/graph/passes/reshape_recovery_pass.cc      |  14 +-
 ge/hybrid/executor/hybrid_model_executor.cc   |   7 +-
 ge/hybrid/model/node_item.cc                  |   2 +-
 .../node_executor/aicore/aicore_op_task.cc    |   9 +
 .../node_executor/aicore/aicore_op_task.h     |   2 +
 ge/offline/main.cc                            |  12 +-
 ge/offline/single_op_parser.cc                |  18 +-
 ge/offline/single_op_parser.h                 |   2 +
 ge/single_op/single_op.cc                     | 147 ++++++++++++++--
 ge/single_op/single_op.h                      |   7 +-
 ge/single_op/single_op_model.cc               |  57 +++++-
 ge/single_op/single_op_model.h                |   7 +-
 ge/single_op/stream_resource.cc               |  16 ++
 ge/single_op/stream_resource.h                |   5 +
 ge/single_op/task/op_task.cc                  |  65 +++++--
 ge/single_op/task/op_task.h                   |  12 +-
 ge/single_op/task/tbe_task_builder.cc         | 101 ++++-------
 ge/single_op/task/tbe_task_builder.h          |   1 -
 inc/framework/generator/ge_generator.h        |   6 +-
 inc/framework/omg/omg_inner_types.h           |   1 +
 tests/ut/ge/CMakeLists.txt                    |   4 +
 .../ut/ge/generator/ge_generator_unittest.cc  |  29 ++--
 .../mark_node_unknown_shape_pass_unittest.cc  | 115 ++++++++++++
 .../passes/reshape_recovery_pass_unittest.cc  |  69 ++++++++
 tests/ut/ge/single_op/single_op_unittest.cc   | 163 ++++++++++++++++++
 33 files changed, 957 insertions(+), 147 deletions(-)
 create mode 100644 ge/graph/passes/mark_node_unknown_shape_pass.cc
 create mode 100644 ge/graph/passes/mark_node_unknown_shape_pass.h
 create mode 100644 tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc
 create mode 100644 tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
 create mode 100644 tests/ut/ge/single_op/single_op_unittest.cc

diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt
index 87e89a38..d84bb89a 100755
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST
     "graph/passes/atomic_addr_clean_pass.cc"
     "graph/passes/mark_same_addr_pass.cc"
     "graph/passes/mark_graph_unknown_status_pass.cc"
+    "graph/passes/mark_node_unknown_shape_pass.cc"
     "graph/passes/mark_agnostic_pass.cc"
     "graph/partition/dynamic_shape_partition.cc"
     "graph/partition/stage_partition.cc"
@@ -509,6 +510,7 @@ set(INFER_SRC_LIST
     "graph/passes/atomic_addr_clean_pass.cc"
     "graph/passes/mark_same_addr_pass.cc"
     "graph/passes/mark_graph_unknown_status_pass.cc"
+    "graph/passes/mark_node_unknown_shape_pass.cc"
     "graph/passes/mark_agnostic_pass.cc"
     "graph/common/omg_util.cc"
     "graph/common/bcast.cc"
diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk
index f30ba22a..32fc206d 100755
--- a/ge/ge_inference.mk
+++ b/ge/ge_inference.mk
@@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \
     graph/passes/atomic_addr_clean_pass.cc \
     graph/passes/mark_same_addr_pass.cc \
     graph/passes/mark_graph_unknown_status_pass.cc \
+    graph/passes/mark_node_unknown_shape_pass.cc \
     graph/passes/mark_agnostic_pass.cc \
     graph/common/omg_util.cc \
     graph/common/bcast.cc \
diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk
index 0efcf820..49515fe4 100644
--- a/ge/ge_runner.mk
+++ b/ge/ge_runner.mk
@@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \
     graph/passes/atomic_addr_clean_pass.cc \
     graph/passes/mark_same_addr_pass.cc \
     graph/passes/mark_graph_unknown_status_pass.cc \
+    graph/passes/mark_node_unknown_shape_pass.cc \
     graph/passes/mark_agnostic_pass.cc \
     graph/partition/dynamic_shape_partition.cc \
     graph/partition/stage_partition.cc \
diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc
index 14882683..feff7d21 100644
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
 const int64_t kDynamicDimValue = -2;
 const int kDefaultDeviceId = 0;
 const int kDefaultJobId = 0;
+const int32_t kFuzzBuildPattern = 1;
 
 std::map<ge::OpEngineType, std::string> engine_type_map{
     {ge::ENGINE_SYS, kEngineNameDefault},
@@ -296,13 +297,44 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso
   return SUCCESS;
 }
 
+static Status GetFuzzBuildAttrs(const OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model,
+                                GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) {
+  GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str());
+  GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
+  for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
+    GE_CHECK_NOTNULL(node);
+    GE_CHECK_NOTNULL(node->GetOpDesc());
+    GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str());
+    node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD);
+  }
+  (void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
+  if (!fuzz_build_attrs.empty()) {
+    GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str());
+    return SUCCESS;
+  } else {
+    GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str());
+  }
+  return SUCCESS;
+}
+
+static bool HasShapeRange(const vector<GeTensor> &inputs) {
+  for (const auto &input : inputs) {
+    vector<pair<int64_t, int64_t>> shape_range;
+    (void)input.GetTensorDesc().GetShapeRange(shape_range);
+    if (!shape_range.empty()) {
+      GELOGD("Has set shape range.");
+      return true;
+    }
+  }
+  return false;
+}
+
 class GeGenerator::Impl {
  public:
   Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
   ~Impl() = default;
 
   Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models);
-
   Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);
 
   Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff);
@@ -742,7 +774,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
 
 Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                                   const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
-                                  bool is_offline) {
+                                  bool is_offline, int32_t compile_flag) {
+  GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size());
   GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
   impl_->is_offline_ = is_offline;
   if (!is_offline) {
@@ -764,6 +797,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
   OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc);
   GE_CHECK_NOTNULL(op_desc_tmp);
 
+  bool fuzz_compile_flag = false;
+  if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) {
+    fuzz_compile_flag = true;
+  }
+  if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) {
+    GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str());
+    return FAILED;
+  }
+  impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag;
+
   // 1. Create ComputeGraph.
   string name = ge::CurrentTimeInStr() + "_" + model_file_name;
   Graph graph;
@@ -810,6 +853,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
     GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
     GE_CHK_STATUS_RET_NOLOG(
       impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
+  } else if (fuzz_compile_flag) {
+    GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str());
+    (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
+    GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs;
+    if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) {
+      GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str());
+      return FAILED;
+    }
+    if (!fuzz_build_attrs.empty()) {
+      GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs),
+                       return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed.");
+    }
+    GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
   } else {
     GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
   }
@@ -825,15 +881,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
  * @param [in] vector<GeTensor> &inputs: Operator input data description information.
  * @param [in] vector<GeTensor> &outputs: Operator output data description information.
  * @param [in] const string &model_file_name: Offline model filename.
+ * @param [in] compile_flag: op build flag from atc
  * @return SUCCESS handle successfully / others handle failed
  */
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
-                                       const vector<GeTensor> &outputs, const string &model_file_name) {
+                                       const vector<GeTensor> &outputs, const string &model_file_name,
+                                       int32_t compile_flag) {
   ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
   GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
   ModelBufferData model_buff;
   OpEngineType engine_type = ENGINE_SYS;
-  Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
+  Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag);
   GELOGI("Finish build single offline model, status: %u", status);
   return status;
 }
@@ -850,7 +908,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
  * @return SUCCESS handle successfully / others handle failed
  */
 
-// old process will be deleted
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                        const vector<GeTensor> &outputs, OpEngineType engine_type,
                                        ModelBufferData &model_buff) {
@@ -864,7 +921,12 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
 Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
                                        const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
                                        ModelBufferData &model_buff) {
-  return SUCCESS;
+  ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
+  GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
+  Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false,
+                                compile_flag);
+  GELOGI("Finish build single online model, status: %u", status);
+  return status;
 }
 
 Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc
index 19679a2a..d866beca 100755
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@@ -61,6 +61,7 @@
 #include "graph/passes/iterator_op_pass.h"
 #include "graph/passes/link_gen_mask_nodes_pass.h"
 #include "graph/passes/mark_graph_unknown_status_pass.h"
+#include "graph/passes/mark_node_unknown_shape_pass.h"
 #include "graph/passes/merge_pass.h"
 #include "graph/passes/merge_input_memcpy_pass.h"
 #include "graph/passes/merge_to_stream_merge_pass.h"
@@ -864,6 +865,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
   }
 
   ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize);
+  // set fuzz compile flag after origin graph optimize
+  GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed.");
   ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id);
   if (ret != SUCCESS) {
     GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
@@ -878,7 +881,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
                                          options_.build_step == BUILD_STEP_AFTER_BUILDER ||
                                          options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB));
   if (run_after_optimize_subgraph) {
-    Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
+    ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
     if (ret != SUCCESS) {
       GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
       return ret;
@@ -896,6 +899,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
   return SUCCESS;
 }
 
+Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) {
+  if (!GetLocalOmgContext().fuzz_compile_flag) {
+    return SUCCESS;
+  }
+  for (const auto &node : compute_graph->GetAllNodes()) {
+    OpDescPtr op_desc = node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag);
+    if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) {
+      GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str());
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
+
 Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) {
   PassManager pass_manager;
   GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass));
@@ -2487,6 +2506,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
                                                            new (std::nothrow) VariableRefDeleteOpPass))
   GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass",
                                                            new (std::nothrow) CompileNodesPass))
+  GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass(
+      "OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass))
   GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass(
       "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass))
   GE_CHK_STATUS_RET(
diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h
index 661cf9d8..b63b138a 100644
--- a/ge/graph/manager/graph_manager.h
+++ b/ge/graph/manager/graph_manager.h
@@ -358,6 +358,7 @@ class GraphManager {
                                      ComputeGraphPtr &compute_graph,
                                      GeRootModelPtr &ge_root_model,
                                      uint64_t session_id);
+  Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph);
 
   Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph,
                                    Graph2SubGraphInfoList &sub_graph_map,
diff --git a/ge/graph/passes/mark_node_unknown_shape_pass.cc b/ge/graph/passes/mark_node_unknown_shape_pass.cc
new file mode 100644
index 00000000..c040e846
--- /dev/null
+++ b/ge/graph/passes/mark_node_unknown_shape_pass.cc
@@ -0,0 +1,99 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/passes/mark_node_unknown_shape_pass.h"
+#include "graph/utils/node_utils.h"
+#include "graph/debug/ge_attr_define.h"
+#include "graph/common/local_context.h"
+
+namespace ge {
+namespace {
+const char *const kEngineNameAiCore = "AIcoreEngine";
+const char *const kNeedRefreshShape = "_need_generate";
+const char *const kOriginalNode = "_original_node";
+const int32_t kDynamicState = -2;
+}
+
+Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) {
+  GE_CHECK_NOTNULL(graph);
+  if (!GetLocalOmgContext().fuzz_compile_flag) {
+    return SUCCESS;
+  }
+  if (IsAllAicoreSupportDyn(graph)) {
+    if (UpdateNodeShapeToUnknown(graph) != SUCCESS) {
+      GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown.");
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
+
+bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) {
+  bool is_all_aicore_support_dyn = false;
+  for (const auto &node : graph->GetAllNodes()) {
+    if (node->GetOpDesc() == nullptr) {
+      continue;
+    }
+    if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) {
+      GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str());
+      continue;
+    }
+    NodePtr original_node = nullptr;
+    original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node);
+    if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) ||
+        (original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) &&
+        !AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) {
+      GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
+      is_all_aicore_support_dyn = true;
+    } else {
+      GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
+      is_all_aicore_support_dyn = false;
+      break;
+    }
+  }
+  return is_all_aicore_support_dyn;
+}
+
+Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) {
+  GELOGD("Need to update node shape to dynamic when get fuzz build result.");
+  for (const auto &node : graph->GetAllNodes()) {
+    if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) {
+      continue;
+    }
+    auto op_desc = node->GetOpDesc();
+    GE_CHECK_NOTNULL(op_desc);
+    for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
+      auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i));
+      if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) {
+        continue;
+      }
+      GELOGD("Update input shape for %s.", node->GetName().c_str());
+      auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
+      if (input_desc != nullptr) {
+        input_desc->SetShape(GeShape({kDynamicState}));
+      }
+    }
+
+    for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
+      if (output_desc != nullptr) {
+        GELOGD("Update output shape for %s.", node->GetName().c_str());
+        output_desc->SetShape(GeShape({kDynamicState}));
+      }
+    }
+  }
+  return SUCCESS;
+}
+}  // namespace ge
\ No newline at end of file
diff --git a/ge/graph/passes/mark_node_unknown_shape_pass.h b/ge/graph/passes/mark_node_unknown_shape_pass.h
new file mode 100644
index 00000000..b78b7826
--- /dev/null
+++ b/ge/graph/passes/mark_node_unknown_shape_pass.h
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
+#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
+#include "graph/graph.h"
+#include "inc/graph_pass.h"
+
+namespace ge {
+class MarkNodeUnknownShapePass : public GraphPass {
+public:
+  Status Run(ComputeGraphPtr graph);
+
+private:
+  bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph);
+  Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph);
+};
+}  // namespace ge
+#endif  // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc
index f0987ff5..84050e87 100644
--- a/ge/graph/passes/reshape_recovery_pass.cc
+++ b/ge/graph/passes/reshape_recovery_pass.cc
@@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
       GE_CHECK_NOTNULL(dst_node->GetOpDesc());
       auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
       GE_CHECK_NOTNULL(dst_tensor);
-      bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
-                                    dst_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
-                                    src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims();
+      bool is_dynamic = false;
+      const auto &src_tensor_dims = src_tensor->GetShape().GetDims();
+      const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims();
+      if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; }))
+          || (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) {
+        GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(),
+               dst_node->GetName().c_str());
+        is_dynamic = true;
+      }
+      bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims &&
+                                    !is_dynamic;
       if (is_need_insert_reshape) {
         auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph());
         GE_CHECK_NOTNULL(reshape);
diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc
index 85b2e9ac..4a8a0af0 100755
--- a/ge/hybrid/executor/hybrid_model_executor.cc
+++ b/ge/hybrid/executor/hybrid_model_executor.cc
@@ -54,6 +54,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
                       "[%s] check input node shape by shape range failed.",
                       root_graph_item->GetName().c_str());
   }
+
   if (context_.global_step != nullptr) {
     GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
                                 sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
@@ -100,8 +101,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
     GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id));
   }
 
-  HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
-  RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
+  if (!model_->IsSingleOp()) {
+    HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
+    RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
+  }
 
   args.outputs.clear();
   HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");
diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc
index f14e9a21..ef43d09f 100644
--- a/ge/hybrid/model/node_item.cc
+++ b/ge/hybrid/model/node_item.cc
@@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() {
 
 Status NodeItem::ResolveDynamicState() {
   (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
-  GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
+  GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic);
   if (!is_dynamic) {
     GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic),
                       "[%s] Failed to get shape status.",
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
index 6f9a5a52..9bfbe47f 100644
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc
@@ -22,6 +22,7 @@
 #include "hybrid/node_executor/aicore/aicore_task_builder.h"
 #include "graph/load/model_manager/tbe_handle_store.h"
 #include "graph/types.h"
+#include "single_op/task/build_task_utils.h"
 
 using optiling::OpRunInfo;
 
@@ -31,6 +32,7 @@ namespace {
 constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
 constexpr char const *kAttrOpParamSize = "op_para_size";
 constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
+std::atomic<std::uint64_t> log_id(0);
 }  // namespace
 
 TbeHandleHolder::TbeHandleHolder(void *bin_handle)
@@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
 }
 
 Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
+  log_name_ = op_desc.GetName() + "_tvmbin";
+  log_id_ = log_id++;
+  auto op_desc_ptr = MakeShared<OpDesc>(op_desc);
+  GE_CHECK_NOTNULL(op_desc_ptr);
+  auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr);
+  GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str());
   GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
   GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
 
@@ -67,6 +75,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
       output_indices_to_skip_.push_back(i);
     }
   }
+  GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str());
   return SUCCESS;
 }
 
diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h
index f7d0854f..fe18bfd0 100755
--- a/ge/hybrid/node_executor/aicore/aicore_op_task.h
+++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h
@@ -114,6 +114,8 @@ class AiCoreOpTask {
   uint32_t tiling_key_ = 0;
   void *handle_ = nullptr;
   bool is_dynamic_ = false;
+  uint64_t log_id_ = 0;
+  std::string log_name_;
 };
 
 class AtomicAddrCleanOpTask : public AiCoreOpTask {
diff --git a/ge/offline/main.cc b/ge/offline/main.cc
index 28d16a79..54a1d8fb 100755
--- a/ge/offline/main.cc
+++ b/ge/offline/main.cc
@@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path");
 
 DEFINE_string(display_model_info, "0", "Optional; display model info");
 
+DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance."
+                                     "normal: no need to compile, used saved .o files directly;"
+                                     "high: need to recompile, high execute performance mode.");
+
 class GFlagUtils {
  public:
   /**
@@ -330,7 +334,8 @@ class GFlagUtils {
         "Default value: $HOME/atc_data\n"
         "  --op_compiler_cache_mode   Set the operator compilation cache mode."
         "Options are disable(default), enable and force(force to refresh the cache)\n"
-        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display");
+        "  --display_model_info     enable for display model info; 0(default): close display, 1: open display.\n"
+        "  --performance_mode       Set high performance mode of compile or execute.");
 
     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
     // Using gflags to analyze input parameters
@@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
   options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode);
   options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path);
   options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path);
+  options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode);
 }
 
 domi::Status GenerateSingleOp(const std::string& json_file_path) {
@@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
       output_path = FLAGS_output + "/";
     }
     output_path += param.file_name;
-    ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path);
+    ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag);
     if (ret != SUCCESS) {
       DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index);
       ret = domi::FAILED;
@@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() {
   options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path));
 
   options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));
+
+  options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode));
   // set enable scope fusion passes
   SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
   // print atc option map
diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc
index 2fa0a043..ce9448d5 100644
--- a/ge/offline/single_op_parser.cc
+++ b/ge/offline/single_op_parser.cc
@@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format";
 constexpr char const *kFileSuffix = ".om";
 constexpr char const *kKeyDynamicInput = "dynamic_input";
 constexpr char const *kKeyDynamicOutput = "dynamic_output";
+constexpr char const *kKeyCompileFlag = "compile_flag";
 constexpr int kDumpJsonIndent = 2;
 constexpr int kShapeRangePairSize = 2;
 constexpr int kShapeRangeLow = 0;
@@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
 }
 
 void from_json(const Json &j, SingleOpDesc &desc) {
-  desc.op = j.at(kKeyOp).get<string>();
+  auto op = j.find(kKeyOp);
+  if (op != j.end()) {
+    desc.op = j.at(kKeyOp).get<string>();
+  }
 
   auto input_desc = j.find(kKeyInputDesc);
   if (input_desc != j.end()) {
@@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) {
   if (attr_field != j.end()) {
     desc.attrs = attr_field->get<vector<SingleOpAttr>>();
   }
+
+  auto compile_flag = j.find(kKeyCompileFlag);
+  if (compile_flag != j.end()) {
+    desc.compile_flag = compile_flag->get<int32_t>();
+  }
 }
 
 Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
@@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
       return ret;
     }
 
+    int32_t compile_flag = 0;
     for (const Json &single_op_json : single_op_list_json) {
       SingleOpDesc single_op_desc;
       GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
       single_op_desc = single_op_json;
+      GELOGD("Compile flag is %d.", single_op_desc.compile_flag);
+      if (single_op_desc.compile_flag == 1) {
+        compile_flag = single_op_desc.compile_flag;
+        continue;
+      }
       if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) {
         GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!");
         REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param.");
@@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
       if (ret != SUCCESS) {
         return ret;
       }
+      param.compile_flag = compile_flag;
 
       op_list.emplace_back(param);
       GELOGI("Parse the index[%d] of op success", index);
diff --git a/ge/offline/single_op_parser.h b/ge/offline/single_op_parser.h
index 71aa58bb..11f5512e 100644
--- a/ge/offline/single_op_parser.h
+++ b/ge/offline/single_op_parser.h
@@ -55,6 +55,7 @@ struct SingleOpDesc {
   std::vector<SingleOpTensorDesc> input_desc;
   std::vector<SingleOpTensorDesc> output_desc;
   std::vector<SingleOpAttr> attrs;
+  int32_t compile_flag = 0;
 };
 
 struct SingleOpBuildParam {
@@ -62,6 +63,7 @@ struct SingleOpBuildParam {
   std::vector<ge::GeTensor> inputs;
   std::vector<ge::GeTensor> outputs;
   std::string file_name;
+  int32_t compile_flag = 0;
 };
 
 void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc);
diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc
index f3f0b647..c305eea9 100755
--- a/ge/single_op/single_op.cc
+++ b/ge/single_op/single_op.cc
@@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32;
 const size_t kDataMemAlignUnit = 2;
 const string kShapeTypeDynamic = "dynamic";
 const string kShapeTypeStatic = "static";
+const int64_t kHostMemType = 1;
+const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
+const uint32_t kAlignBytes = 512;
 
 size_t GetAlignedSize(size_t size) {
   size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
@@ -65,6 +68,72 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
   profiling_manager.ReportProfilingData(model_id, task_desc_info);
   return SUCCESS;
 }
+
+Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
+                            std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
+  int64_t total_size = 0;
+  size_t index = 0;
+  for (auto &input_buffer : inputs) {
+    int64_t input_size = 0;
+    if (input_buffer.placement == kHostMemType) {
+      GE_CHECK_LE(input_buffer.length, INT64_MAX);
+      input_size = input_buffer.length;
+      // input_size pad to 512
+      GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX.");
+      input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes;
+      inputs_size.emplace_back(index, input_size);
+      GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX.");
+      total_size += input_size;
+      GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
+    }
+    index++;
+  }
+  if (total_size > kFuzzDeviceBufferSize) {
+    GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
+    return FAILED;
+  }
+  return SUCCESS;
+}
+
+Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream,
+                              const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
+                              std::vector<DataBuffer> &update_buffers) {
+  GE_CHECK_NOTNULL(stream_resource);
+  if (stream_resource->Init() != SUCCESS) {
+    GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer.");
+    return FAILED;
+  }
+  auto dst_addr = reinterpret_cast<uint8_t *>(stream_resource->GetDeviceBufferAddr());
+  // copy host mem from input_buffer to device mem of dst_addr
+  for (const auto &input_size : inputs_size) {
+    size_t index = input_size.first;
+    auto size = input_size.second;
+    GELOGD("Do H2D for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length);
+    GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
+                                RT_MEMCPY_HOST_TO_DEVICE_EX, stream));
+    update_buffers[index].data = dst_addr;
+    dst_addr = reinterpret_cast<uint8_t *>(dst_addr + size);
+  }
+  return SUCCESS;
+}
+
+Status InitHybridModelArgs(const std::vector<DataBuffer> &input_buffers,
+                           const std::vector<DataBuffer> &output_buffers,
+                           const std::vector<GeTensorDesc> &inputs_desc,
+                           hybrid::HybridModelExecutor::ExecuteArgs &args) {
+  for (auto &input : input_buffers) {
+    args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
+  }
+  for (auto &output : output_buffers) {
+    args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
+  }
+  for (auto &tensor_desc : inputs_desc) {
+    auto desc = MakeShared<GeTensorDesc>(tensor_desc);
+    GE_CHECK_NOTNULL(desc);
+    args.input_desc.emplace_back(desc);
+  }
+  return SUCCESS;
+}
 }  // namespace
 
 SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream)
@@ -168,13 +237,28 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
 
 FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs,
                                                                                const std::vector<DataBuffer> &outputs) {
+  GELOGD("Start SingleOp::ExecuteAsync.");
   Status ret = ValidateArgs(inputs, outputs);
   if (ret != SUCCESS) {
     return ret;
   }
 
   GE_CHECK_NOTNULL(stream_resource_);
+  vector<pair<size_t, uint64_t>> inputs_size;
+  GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size));
   std::lock_guard<std::mutex> lk(*stream_mutex_);
+  vector<DataBuffer> update_buffers = inputs;
+  if (!inputs_size.empty()) {
+    GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers));
+  }
+
+  if (hybrid_model_executor_ != nullptr) {
+    GELOGD("Execute multi-task single op by hybrid model executor");
+    hybrid::HybridModelExecutor::ExecuteArgs args;
+    GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args));
+    return hybrid_model_executor_->Execute(args);
+  }
+
   auto current_mem_base = stream_resource_->GetMemoryBase();
   if (running_param_->mem_base != current_mem_base) {
     running_param_->mem_base = const_cast<uint8_t *>(current_mem_base);
@@ -185,7 +269,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
           task->GetOpdesc()->GetName().c_str());
     }
   }
-  ret = UpdateArgs(inputs, outputs);
+  ret = UpdateArgs(update_buffers, outputs);
   if (ret != SUCCESS) {
     return ret;
   }
@@ -252,33 +336,64 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
   return SUCCESS;
 }
 
+Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
+                                           const vector<GeTensorDesc> &input_desc,
+                                           const std::vector<DataBuffer> &input_buffers) {
+  auto op_desc = op_task_->GetOpdesc();
+  GE_CHECK_NOTNULL(op_desc);
+  GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str());
+  for (const auto &input_size : inputs_size) {
+    size_t index = input_size.first;
+    auto ge_tensor_desc = input_desc.at(index);
+    // reconstruct GeTensor by DataBuffer
+    GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
+    GE_CHECK_NOTNULL(ge_tensor);
+    GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
+           index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
+    if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
+                           static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
+      GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
+      return INTERNAL_ERROR;
+    }
+    auto tensor_desc = op_desc->MutableInputDesc(index);
+    GE_CHECK_NOTNULL(tensor_desc);
+    if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) {
+      GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str());
+      return FAILED;
+    }
+  }
+  return SUCCESS;
+}
+
 Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
                                      const vector<DataBuffer> &input_buffers,
                                      vector<GeTensorDesc> &output_desc,
                                      vector<DataBuffer> &output_buffers) {
+  GELOGD("Start DynamicSingleOp::ExecuteAsync.");
   GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
+  vector<pair<size_t, uint64_t>> inputs_size;
+  GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size));
+  vector<DataBuffer> update_buffers = input_buffers;
+  std::lock_guard<std::mutex> lk(*stream_mutex_);
+  if (!inputs_size.empty()) {
+    StreamResource *stream_resource  = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
+    GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers));
+  }
+
   if (hybrid_model_executor_ != nullptr) {
     GELOGD("Execute multi-task dynamic single op by hybrid model executor");
     hybrid::HybridModelExecutor::ExecuteArgs args;
-    for (auto &input : input_buffers) {
-      args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
-    }
-    for (auto &output : output_buffers) {
-      args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
-    }
-    for (auto &tensor_desc : input_desc) {
-      auto desc = MakeShared<GeTensorDesc>(tensor_desc);
-      GE_CHECK_NOTNULL(desc);
-      args.input_desc.emplace_back(desc);
-    }
+    GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args));
 
     return hybrid_model_executor_->Execute(args);
   }
-
-  std::lock_guard<std::mutex> lk(*stream_mutex_);
   GE_CHECK_NOTNULL(op_task_);
-
-  GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
+  if (!inputs_size.empty()) {
+    GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers));
+    GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_));
+  } else {
+    GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
+  }
   GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_));
   GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
   return SUCCESS;
diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h
index b350b684..01d6dfc0 100755
--- a/ge/single_op/single_op.h
+++ b/ge/single_op/single_op.h
@@ -59,6 +59,9 @@ class SingleOp {
   std::vector<OpTask *> tasks_;
   std::vector<std::vector<uintptr_t *>> arg_table_;
   std::unique_ptr<SingleOpModelParam> running_param_;
+  std::unique_ptr<hybrid::HybridModel> hybrid_model_;
+  std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
+  std::vector<GeTensorDesc> inputs_desc_;
 };
 
 class DynamicSingleOp {
@@ -76,7 +79,8 @@ class DynamicSingleOp {
                         const std::vector<DataBuffer> &inputs,
                         std::vector<GeTensorDesc> &output_desc,
                         std::vector<DataBuffer> &outputs) const;
-
+  Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
+                            const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
   std::unique_ptr<OpTask> op_task_;
   std::unique_ptr<hybrid::HybridModel> hybrid_model_;
   std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
@@ -85,6 +89,7 @@ class DynamicSingleOp {
   rtStream_t stream_ = nullptr;
   size_t num_inputs_ = 0;
   size_t num_outputs_ = 0;
+  ComputeGraphPtr compute_graph_;
 };
 }  // namespace ge
 #endif  // GE_SINGLE_OP_SINGLE_OP_H_
diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc
index 3c2b7cc3..d2f8062a 100755
--- a/ge/single_op/single_op_model.cc
+++ b/ge/single_op/single_op_model.cc
@@ -43,6 +43,8 @@ using std::vector;
 namespace ge {
 namespace {
 const size_t kDataOutputNum = 1;
+const uint32_t kOutputIndexOfData = 0;
+constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
 
 Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
   auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
@@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
     auto op_desc = node->GetOpDesc();
     GE_CHECK_NOTNULL(op_desc);
     const auto &depends = op_desc->GetOpInferDepends();
-    if (!depends.empty()) {
+    bool support_dynamic_shape = false;
+    (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape);
+    if (!depends.empty() && support_dynamic_shape) {
       flag = true;
       return SUCCESS;
     }
@@ -462,6 +466,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa
   *task = aicpucc_task.release();
   return SUCCESS;
 }
+Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model,
+                                              SingleOp &single_op) {
+  for (const auto &op_desc : data_ops_) {
+    auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData);
+    GeTensorDesc tensor_desc(output_tensor_desc);
+    single_op.inputs_desc_.emplace_back(tensor_desc);
+    GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str());
+  }
+  GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
+  auto root_model = model_helper_.GetGeRootModel();
+  GE_CHECK_NOTNULL(root_model);
+  root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
+  root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
+  single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
+  GE_CHECK_NOTNULL(single_op.hybrid_model_);
+  GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
+  int32_t device_id = 0;
+  GE_CHK_RT_RET(rtGetDevice(&device_id));
+  single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
+                                                                                       device_id,
+                                                                                       resource.GetStream()));
+  GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
+  GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
+  return SUCCESS;
+}
 
 Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
   GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs());
@@ -469,10 +498,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
   single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_));
   GE_CHECK_NOTNULL(single_op.running_param_);
   GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op));
+  auto ge_model = model_helper_.GetGeModel();
+  GE_CHECK_NOTNULL(ge_model);
+  bool infer_depend_flag = false;
+  GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed.");
+  if (infer_depend_flag) {
+    // construct single_op, do single op with HybridModelExecutor
+    GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor.");
+    return InitHybridModelExecutor(resource, ge_model, single_op);
+  }
   return BuildTaskList(&resource, single_op);
 }
 
-Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
+Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def,
+                                           DynamicSingleOp &single_op) {
   auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
   const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
                                                             task_def.kernel_with_handle().context();
@@ -483,6 +522,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
     TbeOpTask *tbe_task = nullptr;
     GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
     tbe_task->SetModelArgs(model_name_, model_id_);
+    if (tbe_task->tiling_buffer_ != nullptr) {
+      GELOGD("tiling buffer is not nullptr.");
+      tbe_task->stream_resource_ = stream_resource;
+    }
     single_op.op_task_.reset(tbe_task);
   } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
     GELOGD("Building AICPU_CC task");
@@ -504,10 +547,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
   return SUCCESS;
 }
 
-Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
+Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) {
   auto ge_model = model_helper_.GetGeModel();
   GE_CHECK_NOTNULL(ge_model);
 
+  auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
+  GE_CHECK_NOTNULL(compute_graph);
+  single_op.compute_graph_ = compute_graph;
   auto tasks = ge_model->GetModelTaskDefPtr()->task();
   for (int i = 0; i < tasks.size(); ++i) {
     const TaskDef &task_def = tasks[i];
@@ -521,7 +567,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
             "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
         return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
       }
-      GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
+      GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op));
     } else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
       if (single_op.op_task_ != nullptr) {
         GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
@@ -561,6 +607,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
   single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
   GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
   model_params_.memory_size = UINT_MAX;
+  model_params_.graph_is_dynamic = true;
 
   auto ge_model = model_helper_.GetGeModel();
   GE_CHECK_NOTNULL(ge_model);
@@ -585,6 +632,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
     GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
     return SUCCESS;
   }
-  return BuildTaskListForDynamicOp(single_op);
+  return BuildTaskListForDynamicOp(&resource, single_op);
 }
 }  // namespace ge
diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h
index b1a7d3ea..d900f09f 100755
--- a/ge/single_op/single_op_model.h
+++ b/ge/single_op/single_op_model.h
@@ -40,6 +40,7 @@ struct SingleOpModelParam {
 
   std::map<uintptr_t, int> addr_mapping_;
   int64_t core_type = 0;
+  bool graph_is_dynamic = false;
 };
 
 class SingleOpModel {
@@ -65,15 +66,17 @@ class SingleOpModel {
   void ParseOutputNode(const OpDescPtr &op_desc);
 
   Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
-  Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
+  Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
   Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
   Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
                            bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
   Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
-  Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op);
+  Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def,
+                              DynamicSingleOp &single_op);
 
   static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
   void ParseArgTable(OpTask *task, SingleOp &op);
+  Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op);
 
   std::string model_name_;
   uint32_t model_id_ = 0;
diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc
index 5f009f63..e7049297 100755
--- a/ge/single_op/stream_resource.cc
+++ b/ge/single_op/stream_resource.cc
@@ -22,6 +22,11 @@
 #include "single_op/single_op_model.h"
 
 namespace ge {
+namespace {
+// limit available device mem size  1M
+const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
+}
+
 StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {
 }
 
@@ -39,6 +44,17 @@ StreamResource::~StreamResource() {
       GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
     }
   }
+
+  if (device_buffer_ != nullptr) {
+    auto rt_ret = rtFree(device_buffer_);
+    GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
+  }
+}
+
+Status StreamResource::Init() {
+  auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM);
+  GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed."));
+  return SUCCESS;
 }
 
 SingleOp *StreamResource::GetOperator(const uint64_t key) {
diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h
index 73a6231b..aecb38c8 100755
--- a/ge/single_op/stream_resource.h
+++ b/ge/single_op/stream_resource.h
@@ -40,6 +40,7 @@ class StreamResource {
   rtStream_t GetStream() const;
   void SetStream(rtStream_t stream);
 
+  Status Init();
   SingleOp *GetOperator(const uint64_t key);
   DynamicSingleOp *GetDynamicOperator(const uint64_t key);
 
@@ -49,6 +50,9 @@ class StreamResource {
   uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true);
   uint8_t *MallocWeight(const std::string &purpose, size_t size);
   const uint8_t *GetMemoryBase() const;
+  void *GetDeviceBufferAddr() const {
+    return device_buffer_;
+  }
 
  private:
   uint8_t *DoMallocMemory(const std::string &purpose,
@@ -65,6 +69,7 @@ class StreamResource {
   rtStream_t stream_ = nullptr;
   std::mutex mu_;
   std::mutex stream_mu_;
+  void *device_buffer_ = nullptr;
 };
 }  // namespace ge
 
diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc
index 2a580c7e..bce52335 100755
--- a/ge/single_op/task/op_task.cc
+++ b/ge/single_op/task/op_task.cc
@@ -137,7 +137,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id
   return SUCCESS;
 }
 
-Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
+Status OpTask::UpdateRunInfo() {
   return UNSUPPORTED;
 }
 
@@ -200,14 +200,14 @@ void TbeOpTask::SetHandle(void *handle) {
 
 Status TbeOpTask::LaunchKernel(rtStream_t stream) {
   GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
-  auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
-  auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream);
+  auto ret = DoLaunchKernel(stream);
+
   int retry_times = 0;
   while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) {
     retry_times++;
     GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times);
     std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime));
-    ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream);
+    ret = DoLaunchKernel(stream);
   }
 
   if (ret != RT_ERROR_NONE) {
@@ -220,8 +220,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
   return SUCCESS;
 }
 
-Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
-  GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc));
+Status TbeOpTask::UpdateRunInfo() {
   // invoke OpParaCalculate
   GELOGD("Start to invoke OpParaCalculate.");
   optiling::OpRunInfo run_info;
@@ -235,10 +234,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
   block_dim_ = run_info.block_dim;
   tiling_data_ = run_info.tiling_data.str();
   tiling_key_ = run_info.tiling_key;
+  run_info_workspaces_ = run_info.workspaces;
   GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
          tiling_data_.size(), tiling_key_);
-
-  GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed.");
   return SUCCESS;
 }
 
@@ -288,14 +286,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons
   return SUCCESS;
 }
 
-void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) {
+Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) {
+  if (tiling_buffer != nullptr) {
+    uintptr_t *arg_base = nullptr;
+    size_t arg_num = 0;
+    GetIoAddr(arg_base, arg_num);
+    GE_CHECK_NOTNULL(node);
+    GE_CHECK_NOTNULL(node->GetOpDesc());
+    uint32_t inputs_num = node->GetOpDesc()->GetInputsSize();
+    uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize();
+    uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size();
+    uint32_t tiling_index = inputs_num + outputs_num + workspace_nums;
+    if (arg_num == 0 || arg_num < tiling_index) {
+      GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.",
+             tiling_index, arg_num);
+      return ACL_ERROR_GE_INTERNAL_ERROR;
+    }
+    arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer);
+  }
   node_ = node;
   tiling_buffer_ = tiling_buffer;
   max_tiling_size_ = max_tiling_size;
+  return SUCCESS;
 }
 
 Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) {
   static const std::string kPurpose("malloc workspace memory for dynamic op.");
+  workspaces_.clear();
   if (workspace_sizes.empty()) {
     GELOGD("No need to allocate workspace.");
     return SUCCESS;
@@ -333,8 +350,10 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
                                vector<GeTensorDesc> &output_desc,
                                vector<DataBuffer> &output_buffers,
                                rtStream_t stream) {
-  GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
   GELOGD("[%s] Start to launch kernel", node_->GetName().c_str());
+  GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc));
+  GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo());
+  GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed.");
   std::vector<void *> args;
   for (auto &buffer : input_buffers) {
     args.emplace_back(buffer.data);
@@ -354,6 +373,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
     args.emplace_back(tiling_buffer_);
   }
 
+  GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *));
+  // node with workspace: build can not get size of workspace, need to update arg_size_ when execute
+  if (arg_size_ < (args.size() * sizeof(void *))) {
+    size_t temp_size = args.size() * sizeof(void *);
+    GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size);
+    args_.reset(new(std::nothrow) uint8_t[temp_size]());
+    GE_CHECK_NOTNULL(args_);
+    arg_size_ = temp_size;
+  }
   if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
     GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str());
     REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str());
@@ -361,17 +389,22 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
   }
 
   GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
+  GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel.");
+
+  return SUCCESS;
+}
+
+Status TbeOpTask::DoLaunchKernel(rtStream_t stream) {
+  auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
   if (handle_ == nullptr) {
-    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
-    GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
+    GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
+                                 sm_desc, stream));
   } else {
     std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
     std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
-    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
-                                           stream, kernel_info.c_str()));
-    GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
+    GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(),
+                                           static_cast<uint32_t>(arg_size_), sm_desc, stream, kernel_info.c_str()));
   }
-
   return SUCCESS;
 }
 
diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h
index 8c91bd5f..0c64ecb4 100644
--- a/ge/single_op/task/op_task.h
+++ b/ge/single_op/task/op_task.h
@@ -30,6 +30,7 @@
 #include "cce/aicpu_engine_struct.h"
 #include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
 #include "init/gelib.h"
+#include "register/op_tiling.h"
 
 namespace ge {
 class StreamResource;
@@ -39,8 +40,7 @@ class OpTask {
   OpTask() = default;
   virtual ~OpTask() = default;
   virtual Status LaunchKernel(rtStream_t stream) = 0;
-  virtual Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
-                               const vector<GeTensorDesc> &output_desc);
+  virtual Status UpdateRunInfo();
   virtual Status UpdateArgTable(const SingleOpModelParam &param);
   void SetModelArgs(std::string model_name, uint32_t model_id);
   Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
@@ -81,22 +81,23 @@ class TbeOpTask : public OpTask {
   void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
                                const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);
 
-  Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
-                       const vector<GeTensorDesc> &output_desc) override;
+  Status UpdateRunInfo() override;
 
   const void *GetArgs() const;
   size_t GetArgSize() const;
   const std::string &GetStubName() const;
-  void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
+  Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size);
   const std::string &GetTaskType() const override;
   void SetHandle(void *handle);
 
  private:
   friend class SingleOpModel;
+  friend class TbeTaskBuilder;
   static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor);
   Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
                            const vector<GeTensorDesc> &output_desc);
   Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes);
+  Status DoLaunchKernel(rtStream_t stream);
 
   const void *stub_func_ = nullptr;
   std::unique_ptr<uint8_t[]> args_;
@@ -108,6 +109,7 @@ class TbeOpTask : public OpTask {
   void *tiling_buffer_ = nullptr;
   uint32_t max_tiling_size_ = 0;
   std::string tiling_data_;
+  std::vector<int64_t> run_info_workspaces_;
   std::vector<void *> workspaces_;
   NodePtr node_;
 
diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc
index 177f42f8..c7ff13d1 100644
--- a/ge/single_op/task/tbe_task_builder.cc
+++ b/ge/single_op/task/tbe_task_builder.cc
@@ -308,92 +308,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param
 }
 
 Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc) {
-  size_t arg_size = kernel_def_.args_size();
-  auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
-  GE_CHECK_NOTNULL(args);
-
-  auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
-  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d", 
-        arg_size, static_cast<int>(rt_ret));
-    REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
-    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
+  bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL);
+  size_t arg_size = 0;
+  std::unique_ptr<uint8_t[]> args = nullptr;
+  if (is_task_all_kernel) {
+    GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str());
+    arg_size = kernel_def_with_handle_.args_size();
+    args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
+    GE_CHECK_NOTNULL(args);
+    GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size,
+                           RT_MEMCPY_HOST_TO_HOST))
+  } else {
+    GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str());
+    arg_size = kernel_def_.args_size();
+    args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
+    GE_CHECK_NOTNULL(args);
+    GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST))
   }
 
-  const domi::KernelContext &context = kernel_def_.context();
+  const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ?
+                                       kernel_def_with_handle_.context() : kernel_def_.context();
   const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
   uint16_t offset = *args_offset_tmp;
 
-  bool is_dynamic = false;
-  (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
-  if (is_dynamic) {
-    GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
-  } else {
-    // copy args
-    std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
-    void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
-    uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
-    rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
-      REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
-      return RT_ERROR_TO_GE_STATUS(rt_ret);
-    }
-  }
-  task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);
+  // copy args
+  std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
+  void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
+  uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
+  GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST));
 
-  return SUCCESS;
-}
-
-Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param,
-                                               const OpDescPtr &op_desc) {
-  size_t arg_size = kernel_def_with_handle_.args_size();
-  auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
-  GE_CHECK_NOTNULL(args);
-
-  auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
-  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d", 
-        arg_size, static_cast<int>(rt_ret));
-    REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
-    return rt_ret;
+  if (is_task_all_kernel) {
+    task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
+                                 kernel_def_with_handle_);
+  } else {
+    task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);
   }
 
-  const domi::KernelContext &context = kernel_def_with_handle_.context();
-  const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
-  uint16_t offset = *args_offset_tmp;
-
   bool is_dynamic = false;
   (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
   if (is_dynamic) {
     GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
-  } else {
-    // copy args
-    std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
-    void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
-    uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
-    rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
-    if (rt_ret != RT_ERROR_NONE) {
-      GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
-      REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
-      return rt_ret;
+    if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) {
+      GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str());
+      task.UpdateRunInfo();
+      GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(),
+                             task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE));
     }
   }
-  task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
-                               kernel_def_with_handle_);
-
   return SUCCESS;
 }
 
 Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
   GELOGD("Build tbe task begin");
-  auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
-  auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) :
-                                                     SetKernelArgs(task, param, op_desc_);
+  auto ret = SetKernelArgs(task, param, op_desc_);
   if (ret != SUCCESS) {
     return ret;
   }
 
+  auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
   ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) :
                                                 RegisterKernel(task, param);
   task.SetHandle(handle_);
@@ -437,7 +410,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
     GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
   }
 
-  task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
+  task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size));
   return SUCCESS;
 }
 }  // namespace ge
diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h
index 8af9a68d..a202cbf1 100755
--- a/ge/single_op/task/tbe_task_builder.h
+++ b/ge/single_op/task/tbe_task_builder.h
@@ -97,7 +97,6 @@ class TbeTaskBuilder {
  private:
   Status InitTilingInfo(TbeOpTask &task);
   Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
-  Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc);
   Status GetSmDesc(void **sm_desc, const SingleOpModelParam &param) const;
 
   Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam &param);
diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h
index db3b2039..24f969dd 100644
--- a/inc/framework/generator/ge_generator.h
+++ b/inc/framework/generator/ge_generator.h
@@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator {
   /// @param [in] inputs: input tensors.
   /// @param [in] outputs: output tensors.
   /// @param [in] model_file_name: name of model file.
+  /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
   /// @return SUCCESS or FAILED
   ///
   Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
-                            const std::vector<GeTensor> &outputs, const std::string &model_file_name);
+                            const std::vector<GeTensor> &outputs, const std::string &model_file_name,
+                            int32_t compile_flag = 0);
   ///
   /// @ingroup ge
   /// @brief: Build single Op into model buff.
@@ -100,7 +102,7 @@ class GE_FUNC_VISIBILITY GeGenerator {
                        ge::ModelBufferData &model, bool is_offline = true);
   Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
                        const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
-                       bool is_offline = true);
+                       bool is_offline = true, int32_t compile_flag = 0);
   bool CheckNoAicore(const ComputeGraphPtr &graph);
   void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
   Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);
diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h
index 54c9ab4a..84f6ef46 100644
--- a/inc/framework/omg/omg_inner_types.h
+++ b/inc/framework/omg/omg_inner_types.h
@@ -123,6 +123,7 @@ struct OmgContext {
   bool need_multi_batch = false;
   std::vector<NodePtr> data_nodes;
   std::vector<NodePtr> getnext_nosink_nodes;
+  bool fuzz_compile_flag = false;
 };
 }  // namespace ge
 
diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt
index 0bc9a6e1..93d5f154 100755
--- a/tests/ut/ge/CMakeLists.txt
+++ b/tests/ut/ge/CMakeLists.txt
@@ -278,6 +278,7 @@ set(COMMON_SRC_FILES
     "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc"
     "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc"
+	"${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc"
     "${GE_CODE_DIR}/ge/model/ge_model.cc"
     "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
     "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
@@ -708,6 +709,8 @@ set(PASS_TEST_FILES
     "graph/passes/transpose_transdata_pass_unittest.cc"
     "graph/passes/parallel_group_pass_unittest.cc"
     "graph/passes/buffer_pool_memory_pass_unittest.cc"
+	"graph/passes/mark_node_unknown_shape_pass_unittest.cc"
+	"graph/passes/reshape_recovery_pass_unittest.cc"
 )
 
 set(KERNEL_TEST_FILES
@@ -799,6 +802,7 @@ set(SINGLE_OP_TEST_FILES
     "single_op/single_op_manager_unittest.cc"
     "single_op/stream_resource_unittest.cc"
     "single_op/single_op_task_unittest.cc"
+	"single_op/single_op_unittest.cc"
 )
 
 set(PROFILING_MNG_TEST_FILES
diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc
index fef90ee5..fb256c7c 100644
--- a/tests/ut/ge/generator/ge_generator_unittest.cc
+++ b/tests/ut/ge/generator/ge_generator_unittest.cc
@@ -45,6 +45,15 @@ ComputeGraphPtr MakeGraph() {
   builder.AddDataEdge(data, 0, addn1, 0);
   return builder.GetGraph();
 }
+
+static GeAttrValue::NamedAttrs CreateNamedAttrs(const string &name, std::map<string, GeAttrValue> map) {
+  GeAttrValue::NamedAttrs named_attrs;
+  named_attrs.SetName(name);
+  for (auto it : map) {
+    named_attrs.SetAttr(it.first, it.second);
+  }
+  return named_attrs;
+}
 }  // namespace
 
 /*
@@ -85,25 +94,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) {
   GeGenerator generator;
   generator.Initialize({});
   ModelBufferData model_buffer;
-  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED);
-}
-
-TEST_F(UtestGeGenerator, test_singleop_fuzz_build) {
-  GeTensorDesc tensor_desc;
-  shared_ptr<OpDesc> op_desc = make_shared<OpDesc>("Add", "add");
-  op_desc->AddInputDesc(tensor_desc);
-  op_desc->AddInputDesc(tensor_desc);
-  op_desc->AddOutputDesc(tensor_desc);
-
-  GeTensor tensor(tensor_desc);
-  const vector<GeTensor> inputs = { tensor, tensor };
-  const vector<GeTensor> outputs = { tensor };
-
-  GeGenerator generator;
-  generator.Initialize({});
-  ModelBufferData model_buffer;
-  bool compile_flag = true;
-  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS);
+  EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED);
 }
 
 TEST_F(UtestGeGenerator, test_check_aicore) {
diff --git a/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc
new file mode 100644
index 00000000..5157e510
--- /dev/null
+++ b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc
@@ -0,0 +1,115 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#define private public
+#include "graph/passes/mark_node_unknown_shape_pass.h"
+
+#include "common/ge_inner_error_codes.h"
+#include "inc/pass_manager.h"
+#include "graph/common/local_context.h"
+#undef private
+
+namespace ge {
+class UtestMarkNodeUnknownShapePass : public testing::Test {
+protected:
+  void SetUp() {}
+  void TearDown() {}
+public:
+  NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
+    GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
+    auto op_desc = std::make_shared<OpDesc>(name, type);
+    for (auto i = 0; i < in_num; ++i) {
+      op_desc->AddInputDesc(test_desc);
+    }
+    for (auto i = 0; i < out_num; ++i) {
+      op_desc->AddOutputDesc(test_desc);
+    }
+    return graph->AddNode(op_desc);
+  }
+///    netoutput1
+///        |
+///       conv1
+///     \       /
+///        data
+  void make_graph(const ComputeGraphPtr &graph) {
+    GetLocalOmgContext().fuzz_compile_flag = true;
+    auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D");
+    {
+      auto data1 = MakeNode(graph, 1, 1, "data", "Data");
+      GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
+      data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
+      data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
+      GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
+    }
+
+    conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine");
+    AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true);
+    auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
+    GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
+  }
+};
+
+TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
+  ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
+  op_desc->SetOpKernelLibName("GE");
+  graph->AddNode(op_desc);
+  PassManager pass;
+  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
+  EXPECT_EQ(pass.Run(graph), SUCCESS);
+}
+
+TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) {
+  OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
+  ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
+  op_desc->SetOpKernelLibName("AIcoreEngine");
+  graph->AddNode(op_desc);
+  GetLocalOmgContext().fuzz_compile_flag = true;
+  PassManager pass;
+  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
+  EXPECT_EQ(pass.Run(graph), SUCCESS);
+}
+
+TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) {
+  ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
+  make_graph(graph);
+  PassManager pass;
+  pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
+  EXPECT_EQ(pass.Run(graph), SUCCESS);
+  EXPECT_EQ(graph->GetAllNodes().size(), 3);
+  for (const auto &node : graph->GetAllNodes()) {
+    if (node->GetName() == "conv1") {
+      auto op_desc = node->GetOpDesc();
+      EXPECT_NE(op_desc, nullptr);
+      for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
+        auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
+        EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2);
+      }
+      for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
+        EXPECT_NE(output_desc, nullptr);
+        EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2);
+      }
+    }
+  }
+}
+
+}  // namespace ge
diff --git a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
new file mode 100644
index 00000000..af60021c
--- /dev/null
+++ b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "graph/passes/reshape_recovery_pass.h"
+
+#include <gtest/gtest.h>
+#include <set>
+#include <string>
+
+#include "graph_builder_utils.h"
+
+namespace ge {
+class UtestReshapeRecoveryPass : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+
+namespace {
+///    netoutput1
+///     |        \
+///transdata1    \
+///    |          \
+///    |   transdata2
+///    |        /
+///   var1   const1
+ut::GraphBuilder Graph1Builder() {
+  ut::GraphBuilder builder = ut::GraphBuilder("g2");
+  auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1});
+  auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224});
+  auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
+  auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224});
+  auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0);
+
+  builder.AddDataEdge(var1, 0, transdata1, 0);
+  builder.AddDataEdge(const1, 0, transdata2, 0);
+  builder.AddDataEdge(transdata2, 0, netoutput1, 1);
+  builder.AddDataEdge(transdata1, 0, netoutput1, 0);
+
+  return builder;
+}
+}  // namespace
+
+TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) {
+  auto builder = Graph1Builder();
+  auto graph = builder.GetGraph();
+  ReshapeRecoveryPass reshape_recovery_pass;
+  EXPECT_EQ(graph->GetDirectNodesSize(),5);
+  Status ret = reshape_recovery_pass.Run(graph);
+  EXPECT_EQ(ret, SUCCESS);
+  EXPECT_EQ(graph->GetDirectNodesSize(),8);
+
+  auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0");
+  EXPECT_NE(reshape1, nullptr);
+}
+}  // namespace ge
diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc
new file mode 100644
index 00000000..8c2f6e51
--- /dev/null
+++ b/tests/ut/ge/single_op/single_op_unittest.cc
@@ -0,0 +1,163 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <vector>
+
+#include "runtime/rt.h"
+
+#define protected public
+#define private public
+#include "single_op/single_op.h"
+#include "single_op/single_op_manager.h"
+#undef private
+#undef protected
+
+using namespace std;
+using namespace ge;
+
+class UtestSingleOp : public testing::Test {
+ protected:
+  void SetUp() {}
+  void TearDown() {}
+};
+
+TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) {
+  uintptr_t resource_id = 0;
+  std::mutex stream_mu;
+  rtStream_t stream = nullptr;
+  rtStreamCreate(&stream, 0);
+  DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);
+
+  vector<int64_t> dims_vec_0 = {2};
+  vector<GeTensorDesc> input_desc;
+  GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
+  // input data from device
+  AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0);
+  input_desc.emplace_back(tensor_desc_0);
+
+  vector<DataBuffer> input_buffers;
+  ge::DataBuffer data_buffer;
+  data_buffer.data = new char[4];
+  data_buffer.length = 4;
+  input_buffers.emplace_back(data_buffer);
+
+  vector<GeTensorDesc> output_desc;
+  vector<DataBuffer> output_buffers;
+
+  // UpdateRunInfo failed
+  EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID);
+}
+
+TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) {
+  uintptr_t resource_id = 0;
+  std::mutex stream_mu;
+  rtStream_t stream = nullptr;
+  rtStreamCreate(&stream, 0);
+  DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);
+  dynamic_single_op.num_inputs_ = 1;
+
+  vector<int64_t> dims_vec_0 = {2};
+  vector<GeTensorDesc> input_desc;
+  GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
+  // input data from host
+  AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1);
+  input_desc.emplace_back(tensor_desc_0);
+
+  int64_t input_size = 0;
+  EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS);
+  EXPECT_EQ(input_size, 64);
+  EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr);
+
+  vector<DataBuffer> input_buffers;
+  ge::DataBuffer data_buffer;
+  data_buffer.data = new char[4];
+  data_buffer.length = 4;
+  input_buffers.emplace_back(data_buffer);
+
+  vector<GeTensorDesc> output_desc;
+  vector<DataBuffer> output_buffers;
+
+  auto *tbe_task = new (std::nothrow) TbeOpTask();
+  ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
+  ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
+  ge::NodePtr node = graph->AddNode(op_desc);
+  tbe_task->node_ = node;
+
+  dynamic_single_op.op_task_.reset((OpTask *)(tbe_task));
+
+  OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1");
+  EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS);
+  dynamic_single_op.op_task_->op_desc_ = desc_ptr;
+  // UpdateRunInfo failed
+  EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID);
+}
+
+
+TEST_F(UtestSingleOp, test_singleop_execute_async1) {
+  StreamResource *res = new (std::nothrow) StreamResource(1);
+  std::mutex stream_mu;
+  rtStream_t stream = nullptr;
+  rtStreamCreate(&stream, 0);
+  SingleOp single_op(res, &stream_mu, stream);
+
+  vector<DataBuffer> input_buffers;
+  ge::DataBuffer data_buffer;
+  data_buffer.data = new char[4];
+  data_buffer.length = 4;
+  data_buffer.placement = 1;
+  input_buffers.emplace_back(data_buffer);
+  vector<DataBuffer> output_buffers;
+
+  single_op.input_sizes_.emplace_back(4);
+  SingleOpModelParam model_params;
+  single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params));
+  single_op.args_.resize(1);
+  EXPECT_EQ(single_op.hybrid_model_executor_, nullptr);
+  EXPECT_EQ(single_op.running_param_->mem_base, nullptr);
+  EXPECT_EQ(single_op.tasks_.size(), 0);
+  EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS);
+}
+
+TEST_F(UtestSingleOp, test_singleop_execute_async2) {
+  StreamResource *res = new (std::nothrow) StreamResource(1);
+  std::mutex stream_mu;
+  rtStream_t stream = nullptr;
+  rtStreamCreate(&stream, 0);
+  SingleOp single_op(res, &stream_mu, stream);
+
+  vector<DataBuffer> input_buffers;
+  ge::DataBuffer data_buffer;
+  data_buffer.data = new char[4];
+  data_buffer.length = 4;
+  data_buffer.placement = 1;
+  input_buffers.emplace_back(data_buffer);
+  vector<DataBuffer> output_buffers;
+
+  single_op.input_sizes_.emplace_back(4);
+  SingleOpModelParam model_params;
+  single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params));
+  single_op.args_.resize(1);
+
+  GeTensorDesc tensor_desc(GeShape({1}), FORMAT_NHWC, DT_UINT64);
+  single_op.inputs_desc_.emplace_back(tensor_desc);
+  std::shared_ptr<ge::GeRootModel> root_model = ge::MakeShared<ge::GeRootModel>();
+  single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
+  single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream));
+  EXPECT_EQ(single_op.running_param_->mem_base, nullptr);
+  EXPECT_EQ(single_op.tasks_.size(), 0);
+  EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID);
+}
\ No newline at end of file