modify geloge and add err report

4 years ago · d85228d6fb
parent bab2821062
commit d85228d6fb
5 changed files with 206 additions and 168 deletions
--- a/ge/hybrid/executor/rt_callback_manager.cc
+++ b/ge/hybrid/executor/rt_callback_manager.cc
@ -27,7 +27,8 @@ Status CallbackManager::RegisterCallback(rtStream_t stream, rtCallback_t callbac
  GE_CHK_RT_RET(rtEventCreate(&event));
  auto rt_ret = rtEventRecord(event, stream);
  if (rt_ret != RT_ERROR_NONE) {
-    GELOGE(RT_FAILED, "Failed to invoke rtEventRecord, error code = %d", rt_ret);
+    GELOGE(RT_FAILED, "[Invoke][rtEventRecord] failed, error code = %d", rt_ret);
+    REPORT_CALL_ERROR("E19999", "Invoke rtEventRecord failed when %s, error code = %d", __FUNCTION__, rt_ret);
    (void) rtEventDestroy(event);
    return RT_FAILED;
  }
@ -50,7 +51,8 @@ Status CallbackManager::Init() {
    return CallbackProcess(context);
  }, ctx);
  if (!ret_future_.valid()) {
-    GELOGE(INTERNAL_ERROR, "Failed to init callback manager.");
+    GELOGE(INTERNAL_ERROR, "[Check][ShareState]Failed to init callback manager.");
+    REPORT_INNER_ERROR("E19999", "Failed to init callback manager.");
    return INTERNAL_ERROR;
  }

@ -73,7 +75,9 @@ Status CallbackManager::CallbackProcess(rtContext_t context) {

    auto rt_err = rtEventSynchronize(event);
    if (rt_err != RT_ERROR_NONE) {
-      GELOGE(RT_FAILED, "rtEventSynchronize failed. ret = %d", rt_err);
+      GELOGE(RT_FAILED, "[Invoke][rtEventSynchronize] failed. ret = %d", rt_err);
+      REPORT_CALL_ERROR("E19999", 
+          "Invoke rtEventSynchronize failed when CallbackManager %s, ret = %d.", __FUNCTION__, rt_err);
      GE_CHK_RT(rtEventDestroy(event));
      return RT_FAILED;
    }
--- a/ge/hybrid/executor/subgraph_context.cc
+++ b/ge/hybrid/executor/subgraph_context.cc
@ -50,9 +50,11 @@ NodeStatePtr SubgraphContext::GetOrCreateNodeState(const NodeItem *node_item) {
 Status SubgraphContext::SetInput(int index, const TensorValue &tensor) {
  if (static_cast<size_t>(index) >= all_inputs_.size()) {
    GELOGE(INTERNAL_ERROR,
-           "output index output range. all input num = %zu, input index = %d",
-           all_inputs_.size(),
-           index);
+        "[Check][Param:index]output index output range. all input num = %zu, input index = %d",
+        all_inputs_.size(), index);
+    REPORT_INNER_ERROR("E19999", 
+        "input param index out range when SubgraphContext %s, all input num = %zu, input index = %d.",
+        __FUNCTION__, all_inputs_.size(), index);
    return INTERNAL_ERROR;
  }
  all_inputs_[index] = tensor;
@ -68,10 +70,11 @@ Status SubgraphContext::SetOutput(const NodeItem &node_item, int output_index, c
  auto index = node_item.output_start + output_index;
  if ((output_index >= node_item.num_outputs) || (static_cast<size_t>(index) >= all_outputs_.size())) {
    GELOGE(INTERNAL_ERROR,
-           "output index output range. all output num = %zu, node_item = %s, output index = %d",
-           all_outputs_.size(),
-           node_item.DebugString().c_str(),
-           output_index);
+        "[Check][Param:output_index]output index output range. all output num = %zu, node_item = %s,"
+        "output index = %d.", all_outputs_.size(), node_item.DebugString().c_str(), output_index);
+    REPORT_INNER_ERROR("E19999", "output index output range when SubgraphContext %s. "
+        "all output num = %zu, node_item = %s, output index = %d.",
+        __FUNCTION__, all_outputs_.size(), node_item.DebugString().c_str(), output_index);
    return INTERNAL_ERROR;
  }

@ -126,7 +129,9 @@ Status SubgraphContext::Await(const NodePtr &node) {

 void SubgraphContext::OnError(Status error) {
  if (error != END_OF_SEQUENCE) {
-    GELOGE(error, "[%s] Error occurred while executing graph.", graph_item_->GetName().c_str());
+    GELOGE(error, "[Check][Param:error][%s] Error occurred while executing graph.", graph_item_->GetName().c_str());
+    REPORT_INNER_ERROR("E19999", "[%s] Error occurred while executing graph when SubgraphContext %s.",
+        graph_item_->GetName().c_str(), __FUNCTION__);
  }
  node_done_manager_.Destroy();
 }
--- a/ge/hybrid/executor/subgraph_executor.cc
+++ b/ge/hybrid/executor/subgraph_executor.cc
@ -44,7 +44,8 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
                              const std::vector<ConstGeTensorDescPtr> &input_desc) {
  subgraph_context_.reset(new(std::nothrow)SubgraphContext(graph_item_, context_));
  GE_CHECK_NOTNULL(subgraph_context_);
-  GE_CHK_STATUS_RET(subgraph_context_->Init(), "[%s] Failed to init subgraph context.", graph_item_->GetName().c_str());
+  GE_CHK_STATUS_RET(subgraph_context_->Init(), 
+      "[Init][SubgraphContext][%s] Failed to init subgraph context.", graph_item_->GetName().c_str());

  shape_inference_engine_.reset(new(std::nothrow) ShapeInferenceEngine(context_, subgraph_context_.get()));
  GE_CHECK_NOTNULL(shape_inference_engine_);
@ -55,7 +56,7 @@ Status SubgraphExecutor::Init(const std::vector<TensorValue> &inputs,
                      graph_item_->GetName().c_str());
  } else {
    GE_CHK_STATUS_RET(InitInputsForKnownShape(inputs),
-                      "[%s] Failed to init subgraph executor for known shape subgraph.",
+        "[Invoke][InitInputsForKnownShape][%s] Failed to init subgraph executor for known shape subgraph.",
        graph_item_->GetName().c_str());
  }

@ -67,8 +68,12 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
  // Number of inputs of parent node should be greater or equal than that of subgraph
  auto input_nodes = graph_item_->GetInputNodes();
  if (inputs.size() < input_nodes.size()) {
-    GELOGE(INTERNAL_ERROR, "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
+    GELOGE(INTERNAL_ERROR, 
+        "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs.",
        graph_item_->GetName().c_str(), inputs.size(), input_nodes.size());
+    REPORT_INNER_ERROR("E19999", 
+        "Number of inputs [%zu] is not sufficient for subgraph which needs [%zu] inputs when SubgraphExecutor %s.",
+        graph_item_->GetName().c_str(), inputs.size(), input_nodes.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@ -87,9 +92,7 @@ Status SubgraphExecutor::InitInputsForUnknownShape(const std::vector<TensorValue
           input_tensor.DebugString().c_str());

    GE_CHK_STATUS_RET(subgraph_context_->SetInput(*input_node, kDataInputIndex, input_tensor),
-                      "[%s] Failed to set input tensor[%zu]",
-                      graph_item_->GetName().c_str(),
-                      i);
+        "[Invoke][SetInput] failed for grap_item[%s] input tensor[%zu]", graph_item_->GetName().c_str(), i);

    if (force_infer_shape_ || input_node->is_dynamic) {
      GELOGD("[%s] Start to update input[%zu] for subgraph data node.", graph_item_->GetName().c_str(), i);
@ -112,11 +115,11 @@ Status SubgraphExecutor::InitInputsForKnownShape(const std::vector<TensorValue>
    auto &parent_input_index = input_index_mapping[i];
    if (static_cast<size_t>(parent_input_index) >= inputs.size()) {
      GELOGE(INTERNAL_ERROR,
-             "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
-             graph_item_->GetName().c_str(),
-             inputs.size(),
-             parent_input_index + 1);
-
+          "[Check][Size][%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs",
+          graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1);
+      REPORT_INNER_ERROR("E19999", 
+          "[%s] Number of inputs [%zu] is not sufficient for subgraph which needs at lease [%d] inputs when %s.",
+          graph_item_->GetName().c_str(), inputs.size(), parent_input_index + 1, __FUNCTION__);
      return INTERNAL_ERROR;
    }

@ -136,10 +139,10 @@ Status SubgraphExecutor::ExecuteAsync(const std::vector<TensorValue> &inputs,
                                      const std::vector<ConstGeTensorDescPtr> &input_desc,
                                      const std::vector<TensorValue> &outputs) {
  GELOGD("[%s] is dynamic = %s", graph_item_->GetName().c_str(), graph_item_->IsDynamic() ? "true" : "false");
-  GE_CHK_STATUS_RET(Init(inputs, input_desc), "[%s] Failed to init executor.", graph_item_->GetName().c_str());
+  GE_CHK_STATUS_RET(Init(inputs, input_desc), "[Invoke][Init]failed for [%s].", graph_item_->GetName().c_str());
  if (!outputs.empty()) {
    GE_CHK_STATUS_RET(EnableOutputZeroCopy(outputs),
-                      "Failed to enable output zero copy by user provided outputs.");
+        "[Invoke][EnableOutputZeroCopy] Failed by user provided outputs.");
  }
  if (!graph_item_->IsDynamic()) {
    return ExecuteAsyncForKnownShape(inputs);
@ -194,11 +197,10 @@ Status SubgraphExecutor::ExecuteAsync(TaskContext &task_context) {
  }

  GE_CHK_STATUS_RET(ExecuteAsync(inputs, input_desc),
-                    "[%s] Failed to execute subgraph.",
-                    graph_item_->GetName().c_str());
+      "[Invoke][ExecuteAsync] failed for [%s].", graph_item_->GetName().c_str());

  GE_CHK_STATUS_RET(SetOutputsToParentNode(task_context),
-                    "[%s] Failed to set output shapes to parent node.",
+      "[Invoke][SetOutputsToParentNode][%s] Failed to set output shapes to parent node.",
      graph_item_->GetName().c_str());
  return SUCCESS;
 }
@ -239,7 +241,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        if (node_item.kernel_task == nullptr) {
          GELOGW("[%s] Node of static shape got no task.", node_item.NodeName().c_str());
          GE_CHK_STATUS_RET(TaskCompileEngine::Compile(*p_node_state, context_),
-                            "[%s] Failed to create task.", p_node_state->GetName().c_str());
+              "[Invoke][Compile] failed for [%s].", p_node_state->GetName().c_str());
        } else {
          node_state->SetKernelTask(node_item.kernel_task);
        }
@ -248,7 +250,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        GE_CHECK_NOTNULL(unique_task_context);
        const auto &task = node_state->GetKernelTask();
        if (task == nullptr) {
-          GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state->GetName().c_str());
+          GELOGE(INTERNAL_ERROR, "[Get][KernelTask] failed for[%s], NodeTask is null.", node_state->GetName().c_str());
+          REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s when %s, nodetask is null.",
+              node_state->GetName().c_str(), __FUNCTION__);
          return INTERNAL_ERROR;
        }
        auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@ -261,8 +265,10 @@ Status SubgraphExecutor::PrepareNodes(int group) {
        GELOGD("Got end of sequence");
        return SUCCESS;
      }
-      GELOGE(INTERNAL_ERROR, "[%s] Error occurs while launching tasks. quit from preparing nodes.",
+      GELOGE(INTERNAL_ERROR, "[Check][State][%s] Error occurs while launching tasks. quit from preparing nodes.",
          graph_item_->GetName().c_str());
+      REPORT_INNER_ERROR("E19999", "[%s] Error occurs while launching tasks. quit from preparing nodes when %s.",
+          graph_item_->GetName().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@ -275,9 +281,9 @@ Status SubgraphExecutor::PrepareNodes(int group) {

 Status SubgraphExecutor::InferShape(ShapeInferenceEngine *shape_inference_engine, NodeState &node_state) const {
  HYBRID_CHK_STATUS_RET(shape_inference_engine->InferShape(node_state),
-                        "[%s] Failed to InferShape.", node_state.GetName().c_str());
+      "[Invoke][InferShape] failed for [%s].", node_state.GetName().c_str());
  HYBRID_CHK_STATUS_RET(shape_inference_engine->PropagateOutputShapes(node_state),
-                        "[%s] Failed to PropagateOutputShapes.", node_state.GetName().c_str());
+      "[Invoke][PropagateOutputShapes] failed for [%s].", node_state.GetName().c_str());
  return SUCCESS;
 }

@ -285,7 +291,7 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  auto &node_item = *node_state.GetNodeItem();
  if (node_item.kernel_task == nullptr) {
    GE_CHK_STATUS_RET(TaskCompileEngine::Compile(node_state, ctx),
-                      "Failed to create task for node[%s]", node_state.GetName().c_str());
+        "[Invoke][Compile] Failed for node[%s]", node_state.GetName().c_str());
  } else {
    node_state.SetKernelTask(node_item.kernel_task);
  }
@ -293,7 +299,9 @@ Status SubgraphExecutor::PrepareForExecution(GraphExecutionContext *ctx, NodeSta
  GE_CHECK_NOTNULL(unique_task_context);
  const auto &task = node_state.GetKernelTask();
  if (task == nullptr) {
-    GELOGE(INTERNAL_ERROR, "[%s] NodeTask is null.", node_state.GetName().c_str());
+    GELOGE(INTERNAL_ERROR, "[Invoke][GetKernelTask] failed for[%s], NodeTask is null.", node_state.GetName().c_str());
+    REPORT_CALL_ERROR("E19999", "invoke GetKernelTask failed for %s, NodeTask is null when %s.",
+        node_state.GetName().c_str(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  auto shared_task_context = std::shared_ptr<TaskContext>(unique_task_context.release());
@ -309,7 +317,8 @@ Status SubgraphExecutor::LaunchTasks() {
  while (true) {
    NodeState *node_state = nullptr;
    if (!ready_queue_.Pop(node_state)) {
-      GELOGE(INTERNAL_ERROR, "[%s] Failed to pop node.", graph_item_->GetName().c_str());
+      GELOGE(INTERNAL_ERROR, "[Invoke][Pop] failed for [%s].", graph_item_->GetName().c_str());
+      REPORT_CALL_ERROR("E19999", "invoke pop failed for %s when %s", graph_item_->GetName().c_str(), __FUNCTION__);
      return INTERNAL_ERROR;
    }

@ -334,8 +343,7 @@ Status SubgraphExecutor::LaunchTasks() {
    GE_CHECK_NOTNULL(shared_task_context);
    shared_task_context->SetForceInferShape(force_infer_shape_);
    HYBRID_CHK_STATUS_RET(ExecutionEngine::ExecuteAsync(*node_state, shared_task_context, *context_),
-                          "[%s] Execute node failed.",
-                          node_state->GetName().c_str());
+        "[Invoke][ExecuteAsync] failed for [%s].", node_state->GetName().c_str());
    GELOGD("[%s] Done executing node successfully.", node_state->GetName().c_str());
  }
 }
@ -361,8 +369,7 @@ Status SubgraphExecutor::ScheduleTasks(int group) {
  }

  GE_CHK_STATUS_RET(prepare_future.get(),
-                    "[%s] Error occurred in task preparation.",
-                    graph_item_->GetName().c_str());
+      "[Invoke][get] [%s] Error occurred in task preparation.", graph_item_->GetName().c_str());

  GELOGD("[%s] Done launching all tasks successfully.", graph_item_->GetName().c_str());
  return SUCCESS;
@ -373,17 +380,17 @@ Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs) {
 }

 Status SubgraphExecutor::GetOutputs(vector<TensorValue> &outputs, std::vector<ConstGeTensorDescPtr> &output_desc) {
-  GE_CHK_STATUS_RET(GetOutputs(outputs), "[%s] Failed to get output tensors.", graph_item_->GetName().c_str());
+  GE_CHK_STATUS_RET(GetOutputs(outputs), "[Invoke][GetOutputs] failed for [%s].", graph_item_->GetName().c_str());

  // copy output data from op to designated position
  GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc),
-                    "[%s] Failed to get output tensor desc.",
-                    graph_item_->GetName().c_str());
+      "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());
  if (outputs.size() != output_desc.size()) {
    GELOGE(INTERNAL_ERROR,
-           "Number of output tensors(%zu) mismatch number of output tensor desc(%zu).",
-           outputs.size(),
-           output_desc.size());
+        "[Check][Size]Number of output tensors(%zu) mismatch number of output tensor desc(%zu).",
+        outputs.size(), output_desc.size());
+    REPORT_INNER_ERROR("E19999", "Number of output tensors(%zu) mismatch number of output tensor desc(%zu) when %s.",
+        outputs.size(), output_desc.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }
  return SUCCESS;
@ -401,17 +408,15 @@ Status SubgraphExecutor::SetOutputsToParentNode(TaskContext &task_context) {
  std::vector<TensorValue> outputs;
  std::vector<ConstGeTensorDescPtr> output_desc_list;
  GE_CHK_STATUS_RET(subgraph_context_->GetOutputs(outputs),
-                    "[%s] Failed to get output tensors.",
-                    graph_item_->GetName().c_str());
+      "[Invoke][GetOutputs][%s] Failed to get output tensors.", graph_item_->GetName().c_str());
  GE_CHK_STATUS_RET(graph_item_->GetOutputDescList(output_desc_list),
-                    "[%s] Failed to get output tensor desc.",
-                    graph_item_->GetName().c_str());
+      "[Invoke][GetOutputDescList][%s] Failed to get output tensor desc.", graph_item_->GetName().c_str());

  if (outputs.size() != output_desc_list.size()) {
-    GELOGE(INTERNAL_ERROR, "[%s] num output tensors = %zu, num output tensor desc = %zu",
-           graph_item_->GetName().c_str(),
-           outputs.size(),
-           output_desc_list.size());
+    GELOGE(INTERNAL_ERROR, "[Check][Size][%s] num output tensors = %zu, num output tensor desc = %zu",
+        graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size());
+    REPORT_INNER_ERROR("E19999", "[%s] num output tensors = %zu, num output tensor desc = %zu when %s",
+        graph_item_->GetName().c_str(), outputs.size(), output_desc_list.size(), __FUNCTION__);
    return INTERNAL_ERROR;
  }

@ -460,9 +465,10 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
  const auto &output_edges = graph_item_->GetOutputEdges();
  // Op -> MetOutput, set the output tensor of Op that output to the NetOutput node
  if (outputs.size() != output_edges.size()) {
-    GELOGE(PARAM_INVALID, "Output number mismatches, expect = %zu, but given = %zu",
-           output_edges.size(),
-           outputs.size());
+    GELOGE(PARAM_INVALID, "[Check][Size]Output number mismatches, expect = %zu, but given = %zu",
+        output_edges.size(), outputs.size());
+    REPORT_INNER_ERROR("E19999", "Output number mismatches, expect = %zu, but given = %zu when %s",
+        output_edges.size(), outputs.size(), __FUNCTION__);
    return PARAM_INVALID;
  }

@ -478,9 +484,7 @@ Status SubgraphExecutor::EnableOutputZeroCopy(const vector<TensorValue> &outputs
           output_tensor.DebugString().c_str());

    GE_CHK_STATUS_RET(subgraph_context_->SetOutput(*output_node, output_idx, output_tensor),
-                      "[%s] Failed to set input tensor[%zu]",
-                      graph_item_->GetName().c_str(),
-                      i);
+        "[Invoke][SetOutput][%s] Failed to set input tensor[%zu]", graph_item_->GetName().c_str(), i);
  }

  GELOGD("Done enabling zero copy for outputs successfully.");
--- a/ge/hybrid/model/graph_item.cc
+++ b/ge/hybrid/model/graph_item.cc
@ -95,7 +95,8 @@ Status GraphItem::GroupNodes() {
    int group = node->group;
    if (group != last_group) {
      if (seen_groups.find(group) != seen_groups.end()) {
-        GELOGE(INTERNAL_ERROR, "Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
+        GELOGE(INTERNAL_ERROR, 
+            "[Order][NodeGroup]Unordered node group found. node = %s, group = %d", node->NodeName().c_str(), group);
        return INTERNAL_ERROR;
      } else {
        last_group = group;
--- a/ge/hybrid/model/hybrid_model_builder.cc
+++ b/ge/hybrid/model/hybrid_model_builder.cc