!1300 Update GE commit id

From: @shenwei41 Reviewed-by: @xsmq,@liucunwei Signed-off-by: @liucunwei
4 years ago · d9d99c3cf5
parent 40e5c42a12 dfb2f4b7af
commit d9d99c3cf5
81 changed files with 2149 additions and 1438 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,8 +1,8 @@
 [submodule "parser"]
 	path = parser
 	url = https://gitee.com/ascend/parser.git
-	branch = master
+	branch = r1.3.0
 [submodule "metadef"]
 	path = metadef
 	url = https://gitee.com/ascend/metadef.git
-	branch = master
+	branch = r1.3.0
--- a/ge/CMakeLists.txt
+++ b/ge/CMakeLists.txt
@ -937,6 +937,10 @@ add_library(atc_stub_ge_compiler SHARED

 add_dependencies(atc_stub_ge_compiler ge_stub)

+target_compile_options(atc_stub_ge_compiler PRIVATE
+    -fno-common
+)
+
 target_link_libraries(atc_stub_ge_compiler PRIVATE
    $<BUILD_INTERFACE:intf_pub>
 )
--- a/ge/client/ge_api.cc
+++ b/ge/client/ge_api.cc
@ -171,17 +171,17 @@ Status GEInitialize(const std::map<AscendString, AscendString> &options) {

 // GE finalize, releasing all resources
 Status GEFinalize() {
-  ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize);
-  GELOGT(TRACE_INIT, "GEFinalize start");
-
-  ErrorManager::GetInstance().GenWorkStreamIdDefault();
+  std::lock_guard<std::mutex> lock(g_ge_release_mutex);
  // check init status
  if (!g_ge_initialized) {
-    GELOGW("GEFinalize is called before GEInitialize");
+    GELOGW("[FINAL][FINAL]GEFinalize is called before GEInitialize");
    return SUCCESS;
  }

-  std::lock_guard<std::mutex> lock(g_ge_release_mutex);
+  ErrorManager::GetInstance().SetStage(ErrorMessage::kFinalize, ErrorMessage::kFinalize);
+  ErrorManager::GetInstance().GenWorkStreamIdDefault();
+  GELOGT(TRACE_INIT, "GEFinalize start");
+
  // call Finalize
  Status ret = SUCCESS;
  Status middle_ret;
--- a/ge/executor/CMakeLists.txt
+++ b/ge/executor/CMakeLists.txt
@ -212,6 +212,7 @@ target_link_libraries(ge_executor PRIVATE
 add_library(ge_executor_shared SHARED ${SRC_LIST} ${PROTO_HDRS})

 target_compile_options(ge_executor_shared PRIVATE
+    -fno-common
    -Werror
    -O2
    -Wno-deprecated-declarations
--- a/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
+++ b/ge/ge_local_engine/ops_kernel_store/op/ge_deleted_op.cc
@ -38,6 +38,7 @@ REGISTER_OP_CREATOR(ExpandDims, GeDeletedOp);
 REGISTER_OP_CREATOR(Reshape, GeDeletedOp);
 REGISTER_OP_CREATOR(ReFormat, GeDeletedOp);
 REGISTER_OP_CREATOR(Squeeze, GeDeletedOp);
+REGISTER_OP_CREATOR(Unsqueeze, GeDeletedOp);
 REGISTER_OP_CREATOR(Size, GeDeletedOp);
 REGISTER_OP_CREATOR(Shape, GeDeletedOp);
 REGISTER_OP_CREATOR(ShapeN, GeDeletedOp);
--- a/ge/ge_runtime/task/label_goto_task.cc
+++ b/ge/ge_runtime/task/label_goto_task.cc
@ -16,14 +16,12 @@

 #include "ge_runtime/task/label_goto_task.h"
 #include "ge_runtime/task/task_factory.h"
+#include "framework/common/util.h"

 namespace ge {
 namespace model_runner {
 LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::shared_ptr<LabelGotoTaskInfo> &task_info)
-    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info),
-      task_info_(task_info),
-      stream_(nullptr),
-      label_(nullptr) {
+    : TaskRepeater<LabelGotoTaskInfo>(model_context, task_info), task_info_(task_info) {
  if (task_info_ == nullptr) {
    GELOGW("task_info_ is null!");
    return;
@ -42,29 +40,78 @@ LabelGotoTask::LabelGotoTask(const ModelContext &model_context, const std::share
  label_ = label_list[label_id];
 }

-LabelGotoTask::~LabelGotoTask() {}
+LabelGotoTask::~LabelGotoTask() {
+  GE_FREE_RT_LOG(label_info_);
+  GE_FREE_RT_LOG(index_value_);
+}

 bool LabelGotoTask::Distribute() {
  GELOGI("LabelGotoTask Distribute start.");
+  if (!CheckParamValid()) {
+    return false;
+  }
+
+  const std::vector<void *> label_list = { label_ };
+  rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
+    return false;
+  }
+
+  uint64_t branch_index = 0;
+  rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
+    return false;
+  }
+
+  uint32_t label_info_size = sizeof(rtLabelDevInfo) * label_list.size();
+  rt_ret = rtMalloc(&label_info_, label_info_size, RT_MEMORY_HBM);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
+    return false;
+  }
+
+  rt_ret = rtLabelListCpy(label_list.data(), label_list.size(), label_info_, label_info_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
+    return false;
+  }
+
+  rt_ret = rtLabelSwitchByIndex(index_value_, label_list.size(), label_info_, stream_);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rt api failed, ret: %#x", rt_ret);
+    return false;
+  }
+
+  GELOGI("DistributeTask end.");
+  return true;
+}
+
+bool LabelGotoTask::CheckParamValid() {
  if (stream_ == nullptr) {
    GELOGE(PARAM_INVALID, "stream is null!");
    return false;
  }
+
  if (label_ == nullptr) {
    GELOGE(PARAM_INVALID, "label is null!");
    return false;
  }
-  rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
-  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
+
+  if (label_info_ != nullptr) {
+    GELOGE(PARAM_INVALID, "label_info_ has dirty data.");
+    return false;
+  }
+
+  if (index_value_ != nullptr) {
+    GELOGE(PARAM_INVALID, "index_value_ has dirty data.");
    return false;
  }

-  GELOGI("DistributeTask end.");
  return true;
 }

 REGISTER_TASK(TaskInfoType::LABEL_GOTO, LabelGotoTask, LabelGotoTaskInfo);
-
 }  // namespace model_runner
 }  // namespace ge
--- a/ge/ge_runtime/task/label_goto_task.h
+++ b/ge/ge_runtime/task/label_goto_task.h
@ -31,9 +31,13 @@ class LabelGotoTask : public TaskRepeater<LabelGotoTaskInfo> {
  bool Distribute() override;

 private:
+  bool CheckParamValid();
+
  std::shared_ptr<LabelGotoTaskInfo> task_info_;
-  void *stream_;
-  void *label_;
+  void *stream_{nullptr};
+  void *label_{nullptr};
+  void *label_info_{nullptr};
+  void *index_value_{nullptr};
 };
 }  // namespace model_runner
 }  // namespace ge
--- a/ge/generator/ge_generator.cc
+++ b/ge/generator/ge_generator.cc
@ -50,9 +50,13 @@ const char *const kFileNameSuffix = "online";
 const char *const kAicpuAllshape = "_AllShape";
 constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
 const int64_t kDynamicDimValue = -2;
+const int kDefaultDeviceId = 0;
+const int kDefaultJobId = 0;

 std::map<ge::OpEngineType, std::string> engine_type_map{
-    {ge::ENGINE_SYS, kEngineNameDefault}, {ge::ENGINE_AICORE, kAIcoreEngine}, {ge::ENGINE_VECTOR, kVectorEngine}};
+    {ge::ENGINE_SYS, kEngineNameDefault},
+    {ge::ENGINE_AICORE, kAIcoreEngine},
+    {ge::ENGINE_VECTOR, kVectorEngine}};

 bool ContainsDynamicInpus(const ge::OpDesc &op_desc) {
  for (auto &tensor_desc : op_desc.GetAllInputsDescPtr()) {
@ -83,8 +87,9 @@ static Status CheckEngineTypeSupport(const NodePtr &node, OpEngineType engine_ty
  } else {
    ErrorManager::GetInstance().ATCReportErrMessage("E14001", {"opname", "optype", "value", "reason"},
        {op_desc->GetName(), op_desc->GetType(), "engine type",
-        "it only support kEngineNameDefault/kAIcoreEngine/kVectorEngine"});
-    GELOGE(FAILED, "CheckEngineType: engine type: %d not support", static_cast<int>(engine_type));
+        "it only support default/AIcoreEngine/VectorEngine"});
+    GELOGE(FAILED, "[Check][EngineType]value:%d not support, "
+           "only support default/AIcoreEngine/VectorEngine now", static_cast<int>(engine_type));
    return FAILED;
  }

@ -188,17 +193,20 @@ static Status AddInputs(const ComputeGraphPtr &graph, const NodePtr &node, const

  (void)AttrUtils::SetBool(data_op, "_is_single_op", true);

-  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail.");
-  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail.");
+  GE_CHK_BOOL_EXEC(data_op->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
+                   "[Add][InputDesc]fail for node:%s", data_op->GetName().c_str());
+  GE_CHK_BOOL_EXEC(data_op->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
+                   "[Add][OutputDesc]fail for node:%s", data_op->GetName().c_str());
  if (attr) {
-    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED, "Set index fail.");
+    GE_CHK_BOOL_EXEC(AttrUtils::SetInt(data_op, ATTR_NAME_INDEX, index), return FAILED,
+                     "[Set][Attr:%s]fail for node:%s", ATTR_NAME_INDEX.c_str(), data_op->GetName().c_str());
  }

  ge::NodePtr arg_node = graph->AddNode(data_op);
-  GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail.");
+  GE_CHK_BOOL_EXEC(arg_node != nullptr, return FAILED, "Insert Data node fail");

  GE_CHK_STATUS(GraphUtils::AddEdge(arg_node->GetOutDataAnchor(0), node->GetInDataAnchor(index)),
-                "Add edge[%s->%s] fail.", data_op->GetName().c_str(), node->GetName().c_str());
+                "[Add][Edge]fail from node:%s to node:%s", data_op->GetName().c_str(), node->GetName().c_str());

  return SUCCESS;
 }
@ -213,20 +221,23 @@ static Status AddOutputs(const ComputeGraphPtr &graph, const NodePtr &node, cons
  for (const auto &out_desc : outputs) {
    GeTensorDesc tensor = out_desc.GetTensorDesc();
    TensorUtils::SetInputTensor(tensor, true);
-    GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add input desc fail");
+    GE_CHK_BOOL_EXEC(op_desc->AddInputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
+                     "[Add][InputDesc]fail for node:%s", op_desc->GetName().c_str());

    TensorUtils::SetInputTensor(tensor, false);
    TensorUtils::SetOutputTensor(tensor, true);
-    GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED, "Add output desc fail");
+    GE_CHK_BOOL_EXEC(op_desc->AddOutputDesc(tensor) == GRAPH_SUCCESS, return FAILED,
+                     "[Add][OutputDesc]fail for node:%s", op_desc->GetName().c_str());
    count++;
  }
  GE_CHECK_NOTNULL_EXEC(graph, return PARAM_INVALID);
  ge::NodePtr out_node = graph->AddNode(op_desc);
-  GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED, "Insert Output node fail.");
+  GE_CHK_BOOL_EXEC(out_node != nullptr, return FAILED,
+                   "[Add][Node:%s]fail in graph:%u", op_desc->GetName().c_str(), graph->GetGraphID());
  GE_CHECK_NOTNULL_EXEC(node, return PARAM_INVALID);
  for (int32_t i = 0; i < count; ++i) {
    GE_CHK_STATUS(GraphUtils::AddEdge(node->GetOutDataAnchor(i), out_node->GetInDataAnchor(i)),
-                  "Add edge[%s->%s] fail.", node->GetName().c_str(), out_node->GetName().c_str());
+                  "[Add][Edge]fail from node:%s to node:%s", node->GetName().c_str(), out_node->GetName().c_str());
  }

  return SUCCESS;
@ -710,7 +721,7 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
    auto node = comp_graph->FindNode(op_desc->GetName());
    Status ret = CheckEngineTypeSupport(node, engine_type);
    if (ret != SUCCESS) {
-      GELOGE(ret, "check engine type failed.");
+      GELOGE(ret, "[Check][EngineType]value:%d for node:%s not support", engine_type, node->GetName().c_str());
      return ret;
    }
  }
@ -915,6 +926,13 @@ Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor>

  static std::atomic<uint64_t> atomic_session_id(0);
  auto session_id = atomic_session_id.fetch_add(1);
+  // This is a temporary add for graph with variable
+  auto version = static_cast<int32_t>(SessionVersion::ClOUD_VERSION);
+  ret = VarManager::Instance(session_id)->Init(version, session_id, kDefaultDeviceId, kDefaultJobId);
+  GELOGI("Start init var instance, session_id %lu", session_id);
+  if (ret != SUCCESS) {
+    GELOGW("Failed init var instance, session_id %lu", session_id);
+  }
  if (is_singleop_unregistered_) {
    ret = graph_manager_.BuildGraphForUnregisteredOp(graph_id, inputs, ge_root_model, session_id);
  } else {
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@ -400,6 +400,10 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor
 }

 static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
+  if (graph->GetGraphUnknownFlag()) {
+    GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
+    return SUCCESS;
+  }
  for (auto &node : graph->GetDirectNode()) {
    // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
    auto op_desc = node->GetOpDesc();
--- a/ge/graph/build/logical_stream_allocator.cc
+++ b/ge/graph/build/logical_stream_allocator.cc
@ -33,13 +33,21 @@ using std::queue;
 namespace ge {
 LogicalStreamPass::LogicalStreamPass(const string &name) : name_(name) {}

-const string &LogicalStreamPass::GetName() const { return name_; }
+const string &LogicalStreamPass::GetName() const {
+  return name_;
+}

-bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const { return subgraph.engine_conf.skip_assign_stream; }
+bool LogicalStreamPass::IsEngineSkip(const Subgraph &subgraph) const {
+  return subgraph.engine_conf.skip_assign_stream;
+}

-bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const { return subgraph.engine_conf.attach; }
+bool LogicalStreamPass::IsEngineAttach(const Subgraph &subgraph) const {
+  return subgraph.engine_conf.attach;
+}

-bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const { return subgraph.engine_conf.independent; }
+bool LogicalStreamPass::IsEngineIndependent(const Subgraph &subgraph) const {
+  return subgraph.engine_conf.independent;
+}

 bool LogicalStreamPass::HasStreamLabel(const Subgraph &subgraph) const {
  return !subgraph.subgraph_info.GetStreamLabel().empty();
@ -60,14 +68,14 @@ Status AssignByLabelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPtr> &
      // Subgraphs of the same stream_label are assigned to the same stream,
      // and different stream_labels are assigned new streams.
      auto iter = label_streams.find(stream_label);
-      if (iter != label_streams.end()) {
-        subgraph->stream_id = iter->second;
-      } else {
+      if (iter == label_streams.end()) {
        subgraph->stream_id = next_stream;
        GELOGI("Assign new stream %ld for label %s.", next_stream, stream_label.c_str());

        label_streams.emplace(stream_label, next_stream);
-        ++next_stream;
+        next_stream++;
+      } else {
+        subgraph->stream_id = iter->second;
      }
      changed = true;
    }
@ -92,15 +100,15 @@ Status IndependentStreamPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt
    const string &stream_label = subgraph->subgraph_info.GetStreamLabel();
    auto &label_streams = engine_streams[engine];
    auto iter = label_streams.find(stream_label);
-    if (iter != label_streams.end()) {
-      subgraph->stream_id = iter->second;
-    } else {
+    if (iter == label_streams.end()) {
      subgraph->stream_id = next_stream;
      GELOGI("Assign new independent stream %ld for engine %s (label: %s).", next_stream, engine.c_str(),
             stream_label.c_str());

      label_streams.emplace(stream_label, next_stream);
-      ++next_stream;
+      next_stream++;
+    } else {
+      subgraph->stream_id = iter->second;
    }
    changed = true;
  }
@ -121,7 +129,9 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP
    }

    SubgraphPtr reusable_subgraph = GetReusableSubgraph(subgraph, end_subgraph_map, pld_subgraph_map);
-    if (reusable_subgraph != nullptr) {
+    if (reusable_subgraph == nullptr) {
+      (void)AssignNewStream(subgraph);
+    } else {
      if (HasAssignedStream(*reusable_subgraph)) {
        subgraph->stream_id = reusable_subgraph->stream_id;
      } else {
@ -140,8 +150,6 @@ Status AssignByDependencyPass::Run(ComputeGraphPtr graph, const vector<SubgraphP
      GELOGI("Subgraph %s of engine %s reuses stream of subgraph %s of engine %s.", subgraph->name.c_str(),
             subgraph->engine_conf.id.c_str(), reusable_subgraph->name.c_str(),
             reusable_subgraph->engine_conf.id.c_str());
-    } else {
-      (void)AssignNewStream(subgraph);
    }
    changed = true;
  }
@ -191,13 +199,15 @@ bool AssignByDependencyPass::CouldReuse(const SubgraphPtr &subgraph, const Subgr
    auto iter = pld_subgraph_map.find(end_pld_pair.second);
    if (iter != pld_subgraph_map.end()) {
      const SubgraphPtr &pred_subgraph_succ = iter->second;
-      if (pred_subgraph_succ != subgraph && pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id) {
+      if ((pred_subgraph_succ != subgraph) &&
+          (pred_subgraph_succ->engine_conf.id == pred_subgraph->engine_conf.id)) {
        return false;
      }
    }
  }

-  if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) || IsEngineAttach(*subgraph)) {
+  if ((subgraph->engine_conf.id == pred_subgraph->engine_conf.id) ||
+      IsEngineAttach(*subgraph)) {
    return true;
  }

@ -406,7 +416,7 @@ Status UpdateForSkippedEnginePass::Run(ComputeGraphPtr graph, const vector<Subgr
        auto op_desc = node->GetOpDesc();
        GE_CHECK_NOTNULL(op_desc);
        auto stream_id = op_desc->GetStreamId();
-        if (stream_id != kInvalidStream && !HasStreamLabel(*subgraph)) {
+        if ((stream_id != kInvalidStream) && !HasStreamLabel(*subgraph)) {
          ops_without_label.emplace(op_desc);
        }
      }
@ -463,7 +473,7 @@ Status AllReduceParallelPass::Run(ComputeGraphPtr graph, const vector<SubgraphPt

  for (const NodePtr &node : graph->GetDirectNode()) {
    if (!IsHcomNode(node->GetType()) ||
-        node->GetInDataNodes().size() <= 1) {
+        (node->GetInDataNodes().size() <= 1)) {
      continue;
    }

@ -575,7 +585,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
  GE_CHECK_NOTNULL(graph);

  NodePtr parent_node = graph->GetParentNode();
-  if (parent_node == nullptr || parent_node->GetOpDesc() == nullptr) {
+  if ((parent_node == nullptr) || (parent_node->GetOpDesc() == nullptr)) {
    context_.default_stream = kInvalidStream;
  } else {
    context_.default_stream = parent_node->GetOpDesc()->GetStreamId();
@ -597,7 +607,7 @@ Status LogicalStreamAllocator::DoAssign(const ComputeGraphPtr &graph, const Grap
    return status;
  }

-  GELOGD("Subgraphs of graph %s:", graph->GetName().c_str());
+  GELOGD("Subgraphs of graph %s", graph->GetName().c_str());
  for (const auto &subgraph : subgraphs) {
    if (subgraph != nullptr) {
      GELOGD("subgraph: %s", subgraph->name.c_str());
@ -686,7 +696,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra
      auto op_desc = node->GetOpDesc();
      if (op_desc != nullptr) {
        int64_t stream_id = op_desc->GetStreamId();
-        if (stream_id != kInvalidStream && stream_id < stream_num) {
+        if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
          stream_has_node[stream_id] = true;
        }
      }
@ -695,10 +705,10 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra

  context_.next_stream = 0;
  vector<int64_t> old_to_new_streams(stream_num, kInvalidStream);
-  for (size_t old_stream = 0; old_stream < stream_has_node.size(); ++old_stream) {
+  for (size_t old_stream = 0; old_stream < stream_has_node.size(); old_stream++) {
    if (stream_has_node[old_stream]) {
      old_to_new_streams[old_stream] = context_.next_stream;
-      ++context_.next_stream;
+      context_.next_stream++;
    }
  }

@ -706,7 +716,7 @@ void LogicalStreamAllocator::RefreshContinuousStreams(const ComputeGraphPtr &gra
    auto op_desc = node->GetOpDesc();
    if (op_desc != nullptr) {
      int64_t stream_id = op_desc->GetStreamId();
-      if (stream_id != kInvalidStream && stream_id < stream_num) {
+      if ((stream_id != kInvalidStream) && (stream_id < stream_num)) {
        op_desc->SetStreamId(old_to_new_streams[stream_id]);
      }
    }
--- a/ge/graph/build/memory/binary_block_mem_assigner.cc
+++ b/ge/graph/build/memory/binary_block_mem_assigner.cc
@ -70,7 +70,10 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
    return SUCCESS;
  }
  if ((all_memory_size.front() <= 0) || (log(kLogBase) == 0)) {
-    GELOGE(FAILED, "Memory size:%ld is invalid.", all_memory_size.front());
+    GELOGE(FAILED, "[Check][MemRangeStep]first mem_range_step:%ld less than 0,invalid,"
+          "maybe has dynamic shape in graph", all_memory_size.front());
+    REPORT_INNER_ERROR("E19999", "first mem_range_step:%ld less than 0,invalid,"
+          "maybe has dynamic shape in graph", all_memory_size.front());
    return FAILED;
  }
  // Memory size is 512 aligned, so it is not necessary to take less than 512
@ -81,12 +84,18 @@ Status BinaryBlockMemAssigner::GetMemoryRanges(vector<int64_t> &range_ceils) {
  GELOGD("Range number: %zu", range_number);

  vector<vector<int64_t>> ranges(range_number);
-  GE_CHK_BOOL_EXEC((range_number != 0), return PARAM_INVALID, "range_number can't be 0.");
+  GE_CHK_BOOL_EXEC((range_number != 0),
+    REPORT_INNER_ERROR("E19999", "inner data[range_number] is 0, judge invalid");
+    return PARAM_INVALID,
+    "[Check][RangeNumber]inner data is 0, judge invalid.");
  size_t range_number_limit = all_memory_size.size() / range_number;
  int64_t range_ceil = min_memory_size;
  for (size_t i = 1; i <= range_number; i++) {
    GE_IF_BOOL_EXEC(TypeUtils::CheckUint64MulOverflow(static_cast<uint64_t>(range_ceil), kRangeCeilInterval),
-                    GELOGE(FAILED, "Multiply result is out of range.");
+                    GELOGE(FAILED, "[Check][MemRangeCeil]Multiply result is out of range,"
+                      "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
+                    REPORT_INNER_ERROR("E19999", "process mem_range_ceil,multiply result out of range,"
+                      "range_ceil:%ld, interval:%u", range_ceil, kRangeCeilInterval);
                    return FAILED);
    range_ceil *= kRangeCeilInterval;  // The block size of each interval is doubled every time.
    for (auto iter = all_memory_size.begin(); iter != all_memory_size.end();) {
--- a/ge/graph/build/memory/block_mem_assigner.cc
+++ b/ge/graph/build/memory/block_mem_assigner.cc
--- a/ge/graph/build/memory/graph_mem_assigner.cc
+++ b/ge/graph/build/memory/graph_mem_assigner.cc
--- a/ge/graph/load/model_manager/data_dumper.cc
+++ b/ge/graph/load/model_manager/data_dumper.cc
@ -385,7 +385,7 @@ Status DataDumper::DumpRefOutput(const DataDumper::InnerDumpInfo &inner_dump_inf

 Status DataDumper::DumpOutputWithTask(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  const auto &output_descs = inner_dump_info.op->GetAllOutputsDesc();
-  const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
+  const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  if (output_descs.size() != output_addrs.size()) {
    GELOGE(PARAM_INVALID, "Invalid output desc addrs size %zu, op %s has %zu output desc.", output_addrs.size(),
           inner_dump_info.op->GetName().c_str(), output_descs.size());
@ -436,7 +436,7 @@ Status DataDumper::DumpOutput(const InnerDumpInfo &inner_dump_info, aicpu::dump:
  // else data, const or variable op
  aicpu::dump::Output output;
  auto output_tensor = inner_dump_info.op->GetOutputDescPtr(inner_dump_info.output_anchor_index);
-  const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(runtime_param_, inner_dump_info.op);
+  const std::vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(*runtime_param_, inner_dump_info.op);
  if (output_tensor == nullptr) {
    GELOGE(PARAM_INVALID, "output_tensor is null, index: %d, size: %zu.", inner_dump_info.output_anchor_index,
           inner_dump_info.op->GetOutputsSize());
@ -540,7 +540,7 @@ Status DataDumper::DumpRefInput(const DataDumper::InnerDumpInfo &inner_dump_info
 Status DataDumper::DumpInput(const InnerDumpInfo &inner_dump_info, aicpu::dump::Task &task) {
  GELOGI("Start dump input");
  const auto &input_descs = inner_dump_info.op->GetAllInputsDesc();
-  const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(runtime_param_, inner_dump_info.op);
+  const std::vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(*runtime_param_, inner_dump_info.op);
  if (input_descs.size() != input_addrs.size()) {
    GELOGE(PARAM_INVALID, "Invalid input desc addrs size %zu, op %s has %zu input desc.", input_addrs.size(),
           inner_dump_info.op->GetName().c_str(), input_descs.size());
--- a/ge/graph/load/model_manager/data_dumper.h
+++ b/ge/graph/load/model_manager/data_dumper.h
@ -36,9 +36,21 @@
 namespace ge {
 class DataDumper {
 public:
-  DataDumper() : runtime_param_{} {}
-
-  explicit DataDumper(const RuntimeParam &rsh) : runtime_param_(rsh) {}
+  explicit DataDumper(RuntimeParam *rsh)
+      : model_name_(),
+        model_id_(0),
+        runtime_param_(rsh),
+        dev_mem_load_(nullptr),
+        dev_mem_unload_(nullptr),
+        op_list_(),
+        input_map_(),
+        load_flag_(false),
+        device_id_(0),
+        global_step_(0),
+        loop_per_iter_(0),
+        loop_cond_(0),
+        compute_graph_(nullptr),
+        ref_info_() {}

  ~DataDumper();

@ -93,10 +105,10 @@ class DataDumper {
  // for inference data dump
  std::string om_name_;

-  uint32_t model_id_ = 0;
-  const RuntimeParam &runtime_param_;
-  void *dev_mem_load_ = nullptr;
-  void *dev_mem_unload_ = nullptr;
+  uint32_t model_id_;
+  RuntimeParam *runtime_param_;
+  void *dev_mem_load_;
+  void *dev_mem_unload_;

  struct InnerDumpInfo;
  struct InnerInputMapping;
@ -107,12 +119,12 @@ class DataDumper {
  uint32_t end_graph_stream_id_ = 0;
  bool is_end_graph_ = false;
  std::multimap<std::string, InnerInputMapping> input_map_;  // release after DavinciModel::Init
-  bool load_flag_ = false;
-  uint32_t device_id_ = 0;
-  uintptr_t global_step_ = 0;
-  uintptr_t loop_per_iter_ = 0;
-  uintptr_t loop_cond_ = 0;
-  ComputeGraphPtr compute_graph_ = nullptr;  // release after DavinciModel::Init
+  bool load_flag_;
+  uint32_t device_id_;
+  uintptr_t global_step_;
+  uintptr_t loop_per_iter_;
+  uintptr_t loop_cond_;
+  ComputeGraphPtr compute_graph_;  // release after DavinciModel::Init
  std::map<OpDescPtr, void *> ref_info_;     // release after DavinciModel::Init
  void *l1_fusion_addr_ = nullptr;

--- a/ge/graph/load/model_manager/davinci_model.cc
+++ b/ge/graph/load/model_manager/davinci_model.cc
@ -31,6 +31,7 @@
 #include "common/scope_guard.h"
 #include "common/thread_pool.h"
 #include "framework/common/debug/ge_log.h"
+#include "framework/common/util.h"
 #include "graph/common/ge_call_wrapper.h"
 #include "graph/compute_graph.h"
 #include "graph/debug/ge_attr_define.h"
@ -184,7 +185,7 @@ DavinciModel::DavinciModel(int32_t priority, const std::shared_ptr<ModelListener
      last_execute_mode_(INITIALIZATION),
      session_id_(0),
      device_id_(0),
-      maxDumpOpNum_(0), data_dumper_(runtime_param_),
+      maxDumpOpNum_(0), data_dumper_(&runtime_param_),
      iterator_count_(0),
      is_l1_fusion_enable_(false),
      is_first_execute_(true) {
@ -297,6 +298,11 @@ void DavinciModel::ReleaseTask() {
      GE_CHK_STATUS(task->Release(), "Release task failed.");
    }
  }
+
+  for (auto &item : label_goto_args_) {
+    GE_FREE_RT_LOG(item.second.first);
+  }
+  label_goto_args_.clear();
 }

 Status DavinciModel::Assign(const GeModelPtr &ge_model) {
@ -654,12 +660,12 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  runtime_param_.graph_id = compute_graph->GetGraphID();

  // op debug register
-  GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed.");
+  GE_CHK_STATUS_RET(OpDebugRegister(), "OpDebugRegister failed");

  GE_TIMESTAMP_START(TransAllVarData);
-  GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed.");
+  GE_CHK_STATUS_RET(TransAllVarData(compute_graph, runtime_param_.graph_id), "TransAllVarData failed");
  GE_TIMESTAMP_END(TransAllVarData, "GraphLoader::TransAllVarData");
-  GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed.");
+  GE_CHK_STATUS_RET(TransVarDataUtils::CopyVarData(compute_graph, session_id_, device_id_), "copy var data failed");

  GE_TIMESTAMP_START(InitModelMem);
  GELOGD("Known node is %d.", known_node_);
@ -667,7 +673,7 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
  if (!known_node_) {
    GE_CHK_STATUS_RET_NOLOG(InitFeatureMapAndP2PMem(dev_ptr, mem_size));
    data_inputer_ = new (std::nothrow) DataInputer();
-    GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr.");
+    GE_CHK_BOOL_RET_STATUS(data_inputer_ != nullptr, MEMALLOC_FAILED, "data_inputer_ is nullptr");
  }
  fixed_mem_base_ = reinterpret_cast<uintptr_t>(mem_base_);
  GE_TIMESTAMP_END(InitModelMem, "GraphLoader::InitModelMem");
@ -1334,6 +1340,39 @@ void DavinciModel::ParseDynamicOutShape(const std::vector<std::string> &str_info
  }
 }

+Status DavinciModel::GetLabelGotoAddr(uint32_t label_index, rtMemType_t mem_type, void *&arg_addr, uint32_t &arg_size) {
+  std::lock_guard<std::mutex> lock(label_args_mutex_);
+  auto it = label_goto_args_.find(label_index);
+  if (it != label_goto_args_.end()) {
+    arg_addr = it->second.first;
+    arg_size = it->second.second;
+    return SUCCESS;
+  }
+
+  if (label_index >= label_list_.size()) {
+    GELOGE(INTERNAL_ERROR, "Invalid label id:%u, label size:%zu", label_index, label_list_.size());
+    return INTERNAL_ERROR;
+  }
+  GE_CHECK_NOTNULL(label_list_[label_index]);
+  vector<rtLabel_t> label_used = { label_list_[label_index] };
+
+  arg_size = label_used.size() * sizeof(rtLabelDevInfo);
+  rtError_t rt_ret = rtMalloc(&arg_addr, arg_size, mem_type);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+
+  label_goto_args_[label_index] = { arg_addr, arg_size };
+  rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), arg_addr, arg_size);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rtLabelListCpy failed, error: %#x", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+
+  return SUCCESS;
+}
+
 /// @ingroup ge
 /// @brief LabelSet Op Initialize.
 /// @param [in] op_desc: LabelSet Op descriptor.
--- a/ge/graph/load/model_manager/davinci_model.h
+++ b/ge/graph/load/model_manager/davinci_model.h
@ -273,6 +273,8 @@ class DavinciModel {

  const vector<rtLabel_t> &GetLabelList() const { return label_list_; }

+  Status GetLabelGotoAddr(uint32_t label_index, rtMemType_t memory_type, void *&addr, uint32_t &size);
+
  Status DestroyThread();

  // get Op
@ -930,6 +932,9 @@ class DavinciModel {
  vector<rtLabel_t> label_list_;
  set<uint32_t> label_id_indication_;

+  mutex label_args_mutex_;
+  map<uint32_t, pair<void *, uint32_t>> label_goto_args_;
+
  mutex outside_addrs_mutex_;
  vector<ZeroCopyTask> zero_copy_tasks_;  // Task used Data or NetOutput addr.
  set<const void *> copy_only_addrs_;     // Address need copy to original place.
--- a/ge/graph/load/model_manager/model_manager.cc
+++ b/ge/graph/load/model_manager/model_manager.cc
@ -297,12 +297,11 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
  if (model_id == INVALID_MODEL_ID) {
    GenModelId(&model_id);
  }
-
-  bool is_shape_unknown = false;
+  auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
  string model_name = "";
-  GE_CHK_STATUS_RET(ge_root_model->CheckIsUnknownShape(is_shape_unknown), "CheckIsUnknownShape failed, model id:%u",
-                    model_id);
-  if (is_shape_unknown || GetContext().GetHostExecFlag()) {
+  bool is_shape_unknown = ge_root_model->GetRootGraph()->GetGraphUnknownFlag();
+  // if multi subgraph is known, do hybrid load process
+  if (is_shape_unknown || GetContext().GetHostExecFlag() || (name_to_model.size() > 1)) {
    return DoLoadHybridModelOnline(model_id, model_name, ge_root_model, listener);
  }

@ -324,7 +323,6 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
  auto root_graph = ge_root_model->GetRootGraph();
  GE_CHECK_NOTNULL(root_graph);
  string root_model_name = root_graph->GetName();
-  auto name_to_model = ge_root_model->GetSubgraphInstanceNameToModel();
  GeModelPtr ge_model = name_to_model[root_model_name];
  Status ret = SUCCESS;
  do {
--- a/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.cc
@ -17,9 +17,15 @@
 #include "graph/load/model_manager/task_info/label_goto_ex_task_info.h"

 #include "graph/load/model_manager/davinci_model.h"
-#include "graph/debug/ge_attr_define.h"

 namespace ge {
+constexpr uint8_t kGotoBranchMax = 1;
+
+LabelGotoExTaskInfo::~LabelGotoExTaskInfo() {
+  args_ = nullptr;
+  GE_FREE_RT_LOG(index_value_);
+}
+
 Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
  GELOGI("LabelGotoExTaskInfo Init Start.");
  GE_CHECK_NOTNULL(davinci_model);
@ -28,7 +34,7 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    return FAILED;
  }

-  // Get LabelGoto task def
+  // Get LabelGotoEx task def
  const domi::LabelGotoExDef &label_goto = task_def.label_goto_ex();
  OpDescPtr op_desc = davinci_model->GetOpByIndex(label_goto.op_index());
  if (op_desc == nullptr) {
@ -43,20 +49,38 @@ Status LabelGotoExTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da
    return INTERNAL_ERROR;
  }

-  const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
-  if (label_index >= label_list.size()) {
-    GELOGE(PARAM_INVALID, "LabelGotoExTaskInfo: Invalid label id:%u, label size:%zu", label_index, label_list.size());
-    return INTERNAL_ERROR;
+  rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
+  GELOGI("memory_type: %u", memory_type);
+
+  GE_CHK_STATUS_RET_NOLOG(davinci_model->GetLabelGotoAddr(label_index, memory_type, args_, args_size_));
+
+  rtError_t rt_ret = rtMalloc(&index_value_, sizeof(uint64_t), memory_type);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rtMalloc failed, error: %#x", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }
-  label_ = label_list[label_index];

-  GELOGI("LabelGotoExTaskInfo Init Success, label id:%u, label:%p.", label_index, label_);
+  uint64_t branch_index = 0;
+  rt_ret = rtMemcpy(index_value_, sizeof(uint64_t), &branch_index, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
+  if (rt_ret != RT_ERROR_NONE) {
+    GELOGE(RT_FAILED, "Call rtMemcpy failed, error: %#x", rt_ret);
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
+  }
+
+  GELOGI("LabelGotoExTaskInfo Init Success, label id:%u", label_index);
  return SUCCESS;
 }

 Status LabelGotoExTaskInfo::Distribute() {
  GELOGI("LabelGotoExTaskInfo Distribute Start.");
-  rtError_t rt_ret = rtLabelGotoEx(label_, stream_);
+  GE_CHECK_NOTNULL(args_);
+  GE_CHECK_NOTNULL(index_value_);
+  if (args_size_ == 0) {
+    GELOGE(PARAM_INVALID, "branch max: %u, args size: %u invalid.", kGotoBranchMax, args_size_);
+    return PARAM_INVALID;
+  }
+
+  rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, kGotoBranchMax, args_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
--- a/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_goto_ex_task_info.h
@ -14,24 +14,26 @@
 * limitations under the License.
 */

-#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
-#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
+#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
+#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_

 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class LabelGotoExTaskInfo : public TaskInfo {
 public:
-  LabelGotoExTaskInfo() : label_(nullptr) {}
+  LabelGotoExTaskInfo() = default;

-  ~LabelGotoExTaskInfo() override { label_ = nullptr; }
+  ~LabelGotoExTaskInfo() override;

  Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

  Status Distribute() override;

 private:
-  void *label_;
+  void *index_value_{nullptr};    // switch index input.
+  void *args_{nullptr};           // label info memory.
+  uint32_t args_size_{0};         // label info length.
 };
 }  // namespace ge
-#endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
+#endif  // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_GOTO_EX_TASK_INFO_H_
--- a/ge/graph/load/model_manager/task_info/label_set_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_set_task_info.h
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
-#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
+#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
+#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_

 #include "graph/load/model_manager/task_info/task_info.h"

@ -34,4 +34,4 @@ class LabelSetTaskInfo : public TaskInfo {
  void *label_;
 };
 }  // namespace ge
-#endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
+#endif  // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SET_TASK_INFO_H_
--- a/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
+++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.cc
@ -16,20 +16,13 @@

 #include "graph/load/model_manager/task_info/label_switch_by_index_task_info.h"

-#include "graph/debug/ge_attr_define.h"
 #include "graph/load/model_manager/davinci_model.h"

 namespace ge {
 constexpr uint8_t kLabelSwitchIndexNum = 1;

 LabelSwitchByIndexTaskInfo::~LabelSwitchByIndexTaskInfo() {
-  if (args_ != nullptr) {
-    rtError_t ret = rtFree(args_);
-    if (ret != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", ret);
-    }
-  }
-  args_ = nullptr;
+  GE_FREE_RT_LOG(args_);
  index_value_ = nullptr;
 }

@ -37,13 +30,12 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
  GELOGI("LabelSwitchByIndexTaskInfo Init Start.");
  GE_CHECK_NOTNULL(davinci_model);

-  const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
  Status ret = SetStream(task_def.stream_id(), davinci_model->GetStreamList());
  if (ret != SUCCESS) {
    return FAILED;
  }

-  // Get LabelSwitch task def
+  // Get LabelSwitchByIndex task def
  const domi::LabelSwitchByIndexDef &label_switch = task_def.label_switch_by_index();
  OpDescPtr op_desc = davinci_model->GetOpByIndex(label_switch.op_index());
  if (op_desc == nullptr) {
@ -68,7 +60,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo

  davinci_model->DisableZeroCopy(index_value_);

-  std::vector<uint32_t> label_idx_list;
+  vector<uint32_t> label_idx_list;
  if (!AttrUtils::GetListInt(op_desc, ATTR_NAME_LABEL_SWITCH_LIST, label_idx_list)) {
    GELOGE(INTERNAL_ERROR, "LabelSwitchByIndexTaskInfo: %s Get attr %s failed.", op_desc->GetName().c_str(),
           ATTR_NAME_LABEL_SWITCH_LIST.c_str());
@ -81,7 +73,8 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
    return INTERNAL_ERROR;
  }

-  label_list_.resize(branch_max_, nullptr);
+  vector<rtLabel_t> label_used(branch_max_, nullptr);
+  const vector<rtLabel_t> &label_list = davinci_model->GetLabelList();
  for (size_t idx = 0; idx < label_idx_list.size(); ++idx) {
    uint32_t label_id = label_idx_list[idx];
    if (label_id >= label_list.size()) {
@ -90,8 +83,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
      return INTERNAL_ERROR;
    }
    GE_CHECK_NOTNULL(label_list[label_id]);
-
-    label_list_[idx] = label_list[label_id];
+    label_used[idx] = label_list[label_id];
  }

  rtMemType_t memory_type = op_desc->HasAttr(ATTR_NAME_MEMORY_TYPE_RANGE) ? RT_MEMORY_TS_4G : RT_MEMORY_HBM;
@ -103,7 +95,7 @@ Status LabelSwitchByIndexTaskInfo::Init(const domi::TaskDef &task_def, DavinciMo
    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

-  rt_ret = rtLabelListCpy(label_list_.data(), label_list_.size(), args_, args_size_);
+  rt_ret = rtLabelListCpy(label_used.data(), label_used.size(), args_, args_size_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
    return RT_ERROR_TO_GE_STATUS(rt_ret);
@ -125,7 +117,7 @@ Status LabelSwitchByIndexTaskInfo::Distribute() {
  rtError_t rt_ret = rtLabelSwitchByIndex(index_value_, branch_max_, args_, stream_);
  if (rt_ret != RT_ERROR_NONE) {
    GELOGE(RT_FAILED, "Call rt api failed, ret: 0x%X", rt_ret);
-    return RT_FAILED;
+    return RT_ERROR_TO_GE_STATUS(rt_ret);
  }

  GELOGI("LabelSwitchByIndexTaskInfo Distribute Success.");
--- a/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
+++ b/ge/graph/load/model_manager/task_info/label_switch_by_index_task_info.h
@ -14,16 +14,15 @@
 * limitations under the License.
 */

-#ifndef GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
-#define GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
+#ifndef GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
+#define GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_

 #include "graph/load/model_manager/task_info/task_info.h"

 namespace ge {
 class LabelSwitchByIndexTaskInfo : public TaskInfo {
 public:
-  LabelSwitchByIndexTaskInfo()
-      : index_value_(nullptr), branch_max_(0), args_(nullptr), args_size_(0), fixed_addr_offset_(0) {}
+  LabelSwitchByIndexTaskInfo() = default;

  ~LabelSwitchByIndexTaskInfo() override;

@ -34,12 +33,11 @@ class LabelSwitchByIndexTaskInfo : public TaskInfo {
  Status CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) override;

 private:
-  void *index_value_;    // switch index input.
-  uint32_t branch_max_;  // max branch count.
-  void *args_;           // label info memory.
-  uint32_t args_size_;   // label info length.
-  std::vector<rtLabel_t> label_list_;
-  int64_t fixed_addr_offset_;
+  void *index_value_{nullptr};    // switch index input.
+  uint32_t branch_max_{0};        // max branch count.
+  void *args_{nullptr};           // label info memory.
+  uint32_t args_size_{0};         // label info length.
+  int64_t fixed_addr_offset_{0};
 };
 }  // namespace ge
-#endif  // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
+#endif  // GE_GRAPH_LOAD_MODEL_MANAGER_TASK_INFO_LABEL_SWITCH_BY_INDEX_TASK_INFO_H_
--- a/ge/graph/manager/graph_caching_allocator.cc
+++ b/ge/graph/manager/graph_caching_allocator.cc
@ -40,7 +40,7 @@ static bool BlockComparator(const Block *left, const Block *right) {
 }

 bool CanMerge(Block *block) {
-  if (block == nullptr || block->allocated || !block->IsSplit()) {
+  if ((block == nullptr) || block->allocated || !block->IsSplit()) {
    return false;
  }
  return true;
@ -52,7 +52,7 @@ size_t GetBinIndex(size_t size) {
    if (size <= range) {
      break;
    }
-    ++index;
+    index++;
  }
  if (index > kNumBins - 1) {
    index = kNumBins - 1;
@ -87,15 +87,15 @@ bool ShouldSplit(const Block *block, size_t size) {

 void IncreaseCount(std::map<size_t, size_t> &count, size_t size) {
  auto it = count.find(size);
-  if (it != count.end()) {
-    it->second++;
-  } else  {
+  if (it == count.end()) {
    count.emplace(size, 1);
+  } else  {
+    it->second++;
  }
 }

 CachingAllocator::CachingAllocator(rtMemType_t memory_type) : memory_type_(memory_type), memory_allocator_(nullptr) {
-  for (uint32_t i = 0; i < kNumBins; ++i) {
+  for (uint32_t i = 0; i < kNumBins; i++) {
    free_block_bins_[i] = nullptr;
  }
 }
@ -105,7 +105,7 @@ Status CachingAllocator::Initialize(uint32_t device_id) {
  // when redo Initialize free old memory
  FreeBlocks();
  std::lock_guard<std::recursive_mutex> lock(mutex_);
-  for (uint32_t i = 0; i < kNumBins; ++i) {
+  for (uint32_t i = 0; i < kNumBins; i++) {
    if (free_block_bins_[i] != nullptr) {
      continue;
    }
@ -132,18 +132,18 @@ void CachingAllocator::Finalize(uint32_t device_id) {

 uint8_t *CachingAllocator::Malloc(size_t size, uint8_t *org_ptr, uint32_t device_id) {
  GELOGI("Start malloc pool memory, size = %zu, device id = %u", size, device_id);
-  uint8_t *ptr = nullptr;
  size = GetBlockSize(size);
+  uint8_t *ptr = nullptr;
  Block *block = FindFreeBlock(size, org_ptr, device_id);
-  if (block != nullptr) {
-    ptr = block->ptr;
-  } else {
+  if (block == nullptr) {
    if (ge::SUCCESS == TryExtendCache(size, device_id)) {
      block = FindFreeBlock(size, org_ptr, device_id);
      if (block != nullptr) {
        ptr = block->ptr;
      }
    }
+  } else {
+    ptr = block->ptr;
  }
  if (ptr == nullptr) {
    GELOGE(FAILED, "Malloc failed device id = %u, size= %zu", device_id, size);
@ -171,7 +171,7 @@ Status CachingAllocator::Free(uint8_t *ptr, uint32_t device_id) {
 }

 void CachingAllocator::FreeBlock(Block *block) {
-  if (block == nullptr || !block->allocated) {
+  if ((block == nullptr) || !block->allocated) {
    return;
  }
  GELOGI("Free block size = %zu", block->size);
@ -187,7 +187,7 @@ void CachingAllocator::FreeBlock(Block *block) {
 }

 void CachingAllocator::MergeBlocks(Block *dst, Block *src, BlockBin &bin) {
-  if (!CanMerge(dst) || !CanMerge(src)) {
+  if (!CanMerge(src) || !CanMerge(dst)) {
    return;
  }

@ -316,7 +316,7 @@ size_t CachingAllocator::FreeCachedBlocks() {
  GELOGI("Free cached blocks");
  std::lock_guard<std::recursive_mutex> lock(mutex_);
  size_t free_cached_memory_size = 0;
-  for (uint32_t i = 0; i < kNumBins; ++i) {
+  for (uint32_t i = 0; i < kNumBins; i++) {
    auto pool = free_block_bins_[i];
    if (pool == nullptr) {
      continue;
@ -324,7 +324,8 @@ size_t CachingAllocator::FreeCachedBlocks() {
    for (auto it = pool->begin(); it != pool->end();) {
      Block *block = *it;
      // free block memory that has not been split
-      if ((block != nullptr) && (block->ptr != nullptr) && (block->prev == nullptr) && (block->next == nullptr) &&
+      if ((block != nullptr) && (block->ptr != nullptr) &&
+          (block->prev == nullptr) && (block->next == nullptr) &&
          (memory_allocator_->FreeMemory(block->ptr) == ge::SUCCESS)) {
        auto itcount = malloced_memory_.find(block->size);
        free_cached_memory_size += block->size;
@ -345,7 +346,7 @@ size_t CachingAllocator::FreeCachedBlocks() {
 }

 void CachingAllocator::FreeBlocks() {
-  GELOGI("Free blocks");
+  GELOGI("Free blocks.");
  std::lock_guard<std::recursive_mutex> lock(mutex_);
  // free allocated blocks and put to cache
  for (auto &it : allocated_blocks_) {
@ -356,9 +357,9 @@ void CachingAllocator::FreeBlocks() {
 }

 void CachingAllocator::FreeBlockBins() {
-  GELOGI("Free block bins");
+  GELOGI("Free block bins.");
  std::lock_guard<std::recursive_mutex> lock(mutex_);
-  for (uint32_t i = 0; i < kNumBins; ++i) {
+  for (uint32_t i = 0; i < kNumBins; i++) {
    if (free_block_bins_[i] != nullptr) {
      delete free_block_bins_[i];
      free_block_bins_[i] = nullptr;
@ -367,9 +368,9 @@ void CachingAllocator::FreeBlockBins() {
 }

 void PrintCount(std::map<size_t, size_t> &count, const std::string &name, size_t total_size, size_t total_count) {
-  GELOGI("%6s total[size:%10zu count:%10zu]", name.c_str(), total_size, total_count);
+  GELOGI("%6s total[size:%10zu count:%10zu].", name.c_str(), total_size, total_count);
  for (auto &it : count) {
-    GELOGI("    |- block[size:%10zu count:%10zu]", it.first, it.second);
+    GELOGI("    |- block[size:%10zu count:%10zu].", it.first, it.second);
  }
 }

@ -383,20 +384,20 @@ void CachingAllocator::PrintStatics() {
  size_t total_free_count = 0;
  size_t total_malloc_size = 0;
  size_t total_malloc_count = 0;
-  std::map<size_t, size_t> using_block;
-  std::map<size_t, size_t> free_block;
-  std::map<size_t, size_t> malloc_block;
+  std::map<size_t, size_t> using_block_stat;
+  std::map<size_t, size_t> free_block_stat;
+  std::map<size_t, size_t> malloc_block_stat;
  do {
    std::lock_guard<std::recursive_mutex> lock(mutex_);
-    for (uint32_t i = 0; i < kNumBins; ++i) {
+    for (uint32_t i = 0; i < kNumBins; i++) {
      auto pool = free_block_bins_[i];
      if (pool == nullptr) {
        continue;
      }
-      for (auto it = pool->begin(); it != pool->end(); ++it) {
+      for (auto it = pool->begin(); it != pool->end(); it++) {
        if ((*it) != nullptr) {
          total_free_size += (*it)->size;
-          IncreaseCount(free_block, (*it)->size);
+          IncreaseCount(free_block_stat, (*it)->size);
          total_free_count++;
        }
      }
@ -405,7 +406,7 @@ void CachingAllocator::PrintStatics() {
    for (auto &it : allocated_blocks_) {
      if (it.second != nullptr) {
        total_using_size += it.second->size;
-        IncreaseCount(using_block, it.second->size);
+        IncreaseCount(using_block_stat, it.second->size);
        total_using_count++;
      }
    }
@ -413,12 +414,12 @@ void CachingAllocator::PrintStatics() {
    for (auto &it : malloced_memory_) {
      total_malloc_size += it.first * it.second;
      total_malloc_count += it.second;
-      malloc_block[it.first] = it.second;
+      malloc_block_stat[it.first] = it.second;
    }
  } while (0);

-  PrintCount(malloc_block, "Malloc", total_malloc_size, total_malloc_count);
-  PrintCount(using_block, "Using", total_using_size, total_using_count);
-  PrintCount(free_block, "Free", total_free_size, total_free_count);
+  PrintCount(malloc_block_stat, "Malloc", total_malloc_size, total_malloc_count);
+  PrintCount(using_block_stat, "Using", total_using_size, total_using_count);
+  PrintCount(free_block_stat, "Free", total_free_size, total_free_count);
 }
 }  // namespace ge
--- a/ge/graph/manager/graph_manager.cc
+++ b/ge/graph/manager/graph_manager.cc
@ -359,7 +359,10 @@ Status GraphManager::AddGraph(const GraphId &graph_id, const Graph &graph,
  std::shared_ptr<Graph> graph_ptr = MakeShared<ge::Graph>(graph);
  GE_IF_BOOL_EXEC(graph_ptr == nullptr, GELOGE(FAILED, "GraphPtr make shared failed");
                  return FAILED);
-
+  // update option about tuning graph
+  ParseOption(options, BUILD_MODE, options_.build_mode);
+  ParseOption(options, BUILD_STEP, options_.build_step);
+  ParseOption(options, TUNING_PATH, options_.tuning_path);
  graph_node->SetGraph(graph_ptr);
  graph_node->SetOptions(options);
  AddGraphNode(graph_id, graph_node);
@ -433,6 +436,10 @@ Status GraphManager::AddGraphWithCopy(const GraphId &graph_id, const Graph &grap
    GELOGE(FAILED, "GraphPtr make shared failed");
    return FAILED;
  }
+  // update option about tuning graph
+  ParseOption(options, BUILD_MODE, options_.build_mode);
+  ParseOption(options, BUILD_STEP, options_.build_step);
+  ParseOption(options, TUNING_PATH, options_.tuning_path);

  graph_node->SetGraph(graph_ptr);
  graph_node->SetOptions(options);
@ -1466,6 +1473,10 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
  GE_IF_BOOL_EXEC(ret != SUCCESS,
                  GELOGE(GE_GRAPH_OPTIONS_INVALID, "Key:ge.compressFlag value is invalid, must be 0 or 1.");
                  return GE_GRAPH_OPTIONS_INVALID);
+  // Set Build model and step
+  ParseOption(options, BUILD_MODE, options_.build_mode);
+  ParseOption(options, BUILD_STEP, options_.build_step);
+  ParseOption(options, BUILD_STEP, options_.tuning_path);

  // ge.graphType.
  options_.run_graph_flag = true;
@ -1514,10 +1525,6 @@ Status GraphManager::ParseOptions(const std::map<std::string, std::string> &opti
  GELOGD("Dynamic dims params: input shape is %s, dynamic dims is %s, dynamic node type is %d",
         options_.input_shape.c_str(), options_.dynamic_dims.c_str(), options_.dynamic_node_type);

-  // Set Build model and step
-  ParseOption(options, BUILD_MODE, options_.build_mode);
-  ParseOption(options, BUILD_STEP, options_.build_step);
-
  return SUCCESS;
 }

@ -1549,6 +1556,7 @@ void GraphManager::ParseOption(const std::map<std::string, std::string> &options
                               std::string &option) {
  auto iter = options.find(key);
  if (iter != options.end()) {
+    GELOGD("Set option %s from value %s to value%s", key.c_str(), option.c_str(), iter->second.c_str());
    option = iter->second;
  }
 }
@ -3132,6 +3140,21 @@ Status GraphManager::ConvertGraphToFile(ComputeGraphPtr &compute_graph, GraphPar
      non_tuning_subgraphs.push_back(sub_graph_tmp);
    }
  }
+  // for function graphs to tune
+  for (auto &function_graph : compute_graph->GetAllSubgraphs()) {
+    auto subgraph_list = sub_graph_map[function_graph];
+    for (const auto &sub_graph_info_ptr : subgraph_list) {
+      GE_CHECK_NOTNULL(sub_graph_info_ptr);
+      ComputeGraphPtr sub_graph_tmp = sub_graph_info_ptr->GetSubGraph();
+      // need to tuning
+      if (sub_graph_info_ptr->GetEngineName() == kVectorEngine ||
+          sub_graph_info_ptr->GetEngineName() == kAIcoreEngine) {
+        tuning_subgraphs.push_back(sub_graph_tmp);
+      } else {
+        non_tuning_subgraphs.push_back(sub_graph_tmp);
+      }
+    }
+  }
  return TuningUtils::ConvertGraphToFile(tuning_subgraphs, non_tuning_subgraphs, exe_flag, path);
 }

--- a/Show More
+++ b/Show More