diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 51233636..85b2e9ac 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -155,6 +155,16 @@ Status HybridModelExecutor::ResetExecutionContext(GraphExecutionContext &context string ctx_id = std::to_string(context.context_id); RuntimeInferenceContext::DestroyContext(ctx_id); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); + for (auto &host_tensor : context.model->GetHostTensors()) { + auto node_id = host_tensor.first; + for (const auto &output_idx_and_tensor : host_tensor.second) { + auto output_idx = output_idx_and_tensor.first; + GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); + ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); + } + } return SUCCESS; } diff --git a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc index 0633eeb5..d9aa051e 100644 --- a/ge/hybrid/executor/hybrid_model_pipeline_executor.cc +++ b/ge/hybrid/executor/hybrid_model_pipeline_executor.cc @@ -38,6 +38,16 @@ Status StageExecutor::ResetExecutionContext(GraphExecutionContext &context) { string ctx_id = std::to_string(context.context_id); RuntimeInferenceContext::DestroyContext(ctx_id); GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::CreateContext(ctx_id), "Failed to Destroy RuntimeInferenceContext"); + RuntimeInferenceContext *ctx = nullptr; + GE_CHK_GRAPH_STATUS_RET(RuntimeInferenceContext::GetContext(ctx_id, &ctx), "Failed to get context"); + for (auto &host_tensor : context.model->GetHostTensors()) { + auto node_id = host_tensor.first; + for (const auto &output_idx_and_tensor : host_tensor.second) { + auto output_idx = output_idx_and_tensor.first; + GELOGD("Preload const host tensor, node_id = %ld, output id = %d", node_id, output_idx); + ctx->SetTensor(node_id, output_idx, output_idx_and_tensor.second.Clone()); + } + } return SUCCESS; } diff --git a/ge/hybrid/model/hybrid_model.cc b/ge/hybrid/model/hybrid_model.cc index c7b2eadb..59cf708e 100644 --- a/ge/hybrid/model/hybrid_model.cc +++ b/ge/hybrid/model/hybrid_model.cc @@ -358,6 +358,10 @@ TensorValue *HybridModel::GetTensor(const NodePtr &node) const { return GetVariable(node->GetName()); } +const map>> &HybridModel::GetHostTensors() const { + return host_tensors_; +} + void *HybridModel::GetGlobalStep() const { if (global_step_ == nullptr) { return nullptr; diff --git a/ge/hybrid/model/hybrid_model.h b/ge/hybrid/model/hybrid_model.h index ed1d092e..c8d30672 100644 --- a/ge/hybrid/model/hybrid_model.h +++ b/ge/hybrid/model/hybrid_model.h @@ -93,6 +93,8 @@ class HybridModel { TensorValue* GetTensor(const NodePtr &node) const; + const std::map>> &GetHostTensors() const; + const std::vector* GetTaskDefs(const NodePtr &node) const; const GraphItem *GetRootGraphItem() const; @@ -148,6 +150,7 @@ class HybridModel { std::unique_ptr root_graph_item_; std::map> subgraph_items_; std::map> node_items_; + std::map>> host_tensors_; bool is_new_model_desc_ = false; // support aipp bool is_single_op_ = false; diff --git a/ge/hybrid/model/hybrid_model_builder.cc b/ge/hybrid/model/hybrid_model_builder.cc index ff107fe6..ad1dae7a 100755 --- a/ge/hybrid/model/hybrid_model_builder.cc +++ b/ge/hybrid/model/hybrid_model_builder.cc @@ -151,6 +151,9 @@ Status HybridModelBuilder::Build() { GE_CHK_STATUS_RET(InitConstantOps(), "[Invoke][InitConstantOps] failed, model_name_:[%s]", GetGraphName()); GE_CHK_STATUS_RET(InitVariableTensors(), "[Invoke][InitVariableTensors], model_name_:[%s]", GetGraphName()); GE_CHK_STATUS_RET(LoadTasks(), "[Invoke][LoadTasks] failed, model_name_:[%s]", GetGraphName()); + GE_CHK_STATUS_RET(OptimizeDependenciesForConstantInputs(), + "[Invoke][OptimizeDependenciesForConstantInputs] failed, model_name_:[%s]", + GetGraphName()); GELOGI("[%s] Done building hybrid model successfully.", GetGraphName()); return SUCCESS; } @@ -353,6 +356,7 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s auto src_node_item = MutableNodeItem(src_node); src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); dependent_for_shape_inference.emplace(src_node); + host_input_value_dependencies_[&node_item].emplace_back(peer_out_anchor->GetIdx(), src_node_item); GELOGD("[%s] Dependent added from output of [%s:%d]", node_item.NodeName().c_str(), src_node_item->NodeName().c_str(), @@ -1536,7 +1540,7 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { src_node->GetName().c_str(), src_op_type.c_str()); - if (src_op_type != CONSTANTOP && src_op_type != VARIABLE) { + if (src_op_type != CONSTANTOP && src_op_type != CONSTANT && src_op_type != VARIABLE) { continue; } @@ -1545,6 +1549,9 @@ Status HybridModelBuilder::IdentifyVariableOutputs(NodeItem &node_item) { GELOGD("Got parent output index = %u", parent_index); GE_CHECK_LE(parent_index, INT32_MAX); node_item.ref_outputs.emplace(static_cast(parent_index), src_node); + if (src_op_type == CONSTANTOP || src_op_type == CONSTANT) { + known_subgraph_constant_output_refs_[&node_item].emplace(parent_index, src_node); + } } // Data nodes marked with REF_VAR_SRC_VAR_NAME @@ -2176,5 +2183,88 @@ Status HybridModelBuilder::ParseDependentByParallelGroup() { } return SUCCESS; } + +Status HybridModelBuilder::OptimizeDependenciesForConstantInputs() { + std::map> converted; + for (auto &it : host_input_value_dependencies_) { + auto node_item = it.first; + std::map ref_counts; + bool changed = false; + for (auto output_idx_and_node : it.second) { + auto output_idx = output_idx_and_node.first; + auto src_node_item = output_idx_and_node.second; + ++ref_counts[src_node_item]; + NodePtr constant_node; + if (src_node_item->node_type == CONSTANT || src_node_item->node_type == CONSTANTOP) { + constant_node = src_node_item->node; + GELOGD("src node [%s] is a constant", src_node_item->NodeName().c_str()); + } else { + auto iter = known_subgraph_constant_output_refs_.find(src_node_item); + if (iter != known_subgraph_constant_output_refs_.end()) { + constant_node = iter->second[output_idx]; + if (constant_node != nullptr) { + GELOGD("Output[%u] of subgraph [%s] is a constant", output_idx, src_node_item->NodeName().c_str()); + } + } + } + + if (constant_node == nullptr) { + GELOGD("Output[%u] of [%s] is not a constant", output_idx, src_node_item->NodeName().c_str()); + continue; + } + + if (converted[constant_node].count(output_idx) == 0) { + GE_CHK_STATUS_RET(Convert2HostTensor(constant_node, src_node_item->node_id, output_idx), + "[%s] Failed to convert constant to host tensor", constant_node->GetName().c_str()); + converted[constant_node].emplace(output_idx); + } + + src_node_item->to_const_output_id_list.erase(output_idx); + --ref_counts[src_node_item]; + changed = true; + } + + if (changed) { + std::vector depends_to_keep; + for (auto &ref_count_it : ref_counts) { + if (ref_count_it.second == 0) { + GELOGD("[%s] no longer depends on [%s] for shape inference", + node_item->NodeName().c_str(), + ref_count_it.first->NodeName().c_str()); + } else { + depends_to_keep.emplace_back(ref_count_it.first->node); + } + } + node_item->dependents_for_shape_inference.swap(depends_to_keep); + } + } + + return SUCCESS; +} +Status HybridModelBuilder::Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx) { + auto tensor_value = hybrid_model_.GetTensor(node); + GE_CHECK_NOTNULL(tensor_value); + auto tensor_desc = node->GetOpDesc()->MutableOutputDesc(0); + GE_CHECK_NOTNULL(tensor_desc); + Tensor tensor(TensorAdapter::GeTensorDesc2TensorDesc(*tensor_desc)); + int64_t tensor_size = -1; + GE_CHK_GRAPH_STATUS_RET(TensorUtils::GetTensorSizeInBytes(*tensor_desc, tensor_size), + "[%s] Failed to get tensor size", node->GetName().c_str()); + if (tensor_size > 0) { + auto copy_size = static_cast(tensor_size); + GE_CHECK_GE(tensor_value->GetSize(), copy_size); + std::vector buffer(copy_size); + GE_CHK_RT_RET(rtMemcpy(buffer.data(), + copy_size, + tensor_value->GetData(), + copy_size, + RT_MEMCPY_DEVICE_TO_HOST)); + tensor.SetData(std::move(buffer)); + GELOGD("[%s] Copy constant tensor to host successfully, size = %zu", node->GetName().c_str(), copy_size); + } + + hybrid_model_.host_tensors_[node_id].emplace_back(output_idx, std::move(tensor)); + return SUCCESS; +} } // namespace hybrid } // namespace ge diff --git a/ge/hybrid/model/hybrid_model_builder.h b/ge/hybrid/model/hybrid_model_builder.h index 430637dc..3e467dc8 100644 --- a/ge/hybrid/model/hybrid_model_builder.h +++ b/ge/hybrid/model/hybrid_model_builder.h @@ -91,6 +91,8 @@ class HybridModelBuilder { Status GenerateBpProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); Status GenerateEndProfilingTask(const OpDescPtr &op_desc, vector &task_def_list); Status GenerateArProfilingTask(const OpDescPtr &op_desc, int64_t log_id, vector &task_def_list); + Status OptimizeDependenciesForConstantInputs(); + Status Convert2HostTensor(const NodePtr &node, int node_id, uint32_t output_idx); const char* GetGraphName() const { return hybrid_model_.model_name_.c_str(); @@ -110,6 +112,12 @@ class HybridModelBuilder { RuntimeParam &runtime_param_; VarManager *var_manager_ = nullptr; + + // map> + std::map> known_subgraph_constant_output_refs_; + + // map> + std::map>> host_input_value_dependencies_; }; } // namespace hybrid } // namespace ge diff --git a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc index 8d1c844a..2a696f24 100644 --- a/tests/ut/ge/hybrid/ge_hybrid_unittest.cc +++ b/tests/ut/ge/hybrid/ge_hybrid_unittest.cc @@ -466,3 +466,77 @@ TEST_F(UtestGeHybrid, hybrid_model_executor_check_shape) { ret = HybridModelExecutor::CheckInputShapeByShapeRange(&graph_item, args1); ASSERT_EQ(ret, ge::INTERNAL_ERROR); } + +TEST_F(UtestGeHybrid, TestOptimizeDependenciesForConstInputs) { + ComputeGraphPtr compute_graph = MakeShared("test"); + GeRootModelPtr root_model = MakeShared(compute_graph); + HybridModel model(root_model); + model.root_graph_ = compute_graph; + HybridModelBuilder builder(model); + + GeShape shape({2, 16}); + GeTensorDesc tensor_desc(shape); + std::unique_ptr const_node_item; + { + OpDescPtr const_op_desc = CreateOpDesc("Constant", "Const"); + const_op_desc->AddOutputDesc(tensor_desc); + auto const_node = compute_graph->AddNode(const_op_desc); + NodeItem::Create(const_node, const_node_item); + } + + std::unique_ptr non_const_node_item; + { + OpDescPtr op_desc = CreateOpDesc("Add", "Add"); + op_desc->AddOutputDesc(tensor_desc); + auto const_node = compute_graph->AddNode(op_desc); + NodeItem::Create(const_node, non_const_node_item); + } + + std::unique_ptr known_node_item; + { + OpDescPtr known_op_desc = CreateOpDesc("known", "PartitionedCall"); + known_op_desc->AddOutputDesc(tensor_desc); + known_op_desc->AddOutputDesc(tensor_desc); + auto known_node = compute_graph->AddNode(known_op_desc); + NodeItem::Create(known_node, known_node_item); + } + + std::unique_ptr dst_node_item; + { + OpDescPtr known_op_desc = CreateOpDesc("SomeOp", "SomeOpType "); + known_op_desc->AddOutputDesc(tensor_desc); + known_op_desc->AddOutputDesc(tensor_desc); + auto known_node = compute_graph->AddNode(known_op_desc); + NodeItem::Create(known_node, dst_node_item); + } + + float buffer[2 * 16]; + unique_ptr tensor_value(new TensorValue(buffer, sizeof(buffer))); + model.constant_tensors_[const_node_item->node] = std::move(tensor_value); + + // Case 1. connect to Const + auto output_id = 1; + builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, const_node_item.get()); + builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get()); + dst_node_item->dependents_for_shape_inference.emplace_back(const_node_item->node); + dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node); + + ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS); + ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); + ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); + + // Case 2. connect to known-subgraph, netoutput connect to Const + builder.host_input_value_dependencies_.clear(); + dst_node_item->dependents_for_shape_inference.clear(); + + builder.known_subgraph_constant_output_refs_[known_node_item.get()].emplace(output_id, const_node_item->node); + builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(output_id, known_node_item.get()); + builder.host_input_value_dependencies_[dst_node_item.get()].emplace_back(0, non_const_node_item.get()); + + dst_node_item->dependents_for_shape_inference.emplace_back(known_node_item->node); + dst_node_item->dependents_for_shape_inference.emplace_back(non_const_node_item->node); + + ASSERT_EQ(builder.OptimizeDependenciesForConstantInputs(), SUCCESS); + ASSERT_EQ(dst_node_item->dependents_for_shape_inference.size(), 1); + ASSERT_EQ(dst_node_item->dependents_for_shape_inference[0], non_const_node_item->node); +}