From 279eb010c190540a230eae69f2bcd18fece7d8dc Mon Sep 17 00:00:00 2001 From: zhou_lili Date: Wed, 7 Apr 2021 09:19:26 +0800 Subject: [PATCH] ge code of fuzz build --- ge/CMakeLists.txt | 2 + ge/ge_inference.mk | 1 + ge/ge_runner.mk | 1 + ge/generator/ge_generator.cc | 74 +++++++- ge/graph/manager/graph_manager.cc | 23 ++- ge/graph/manager/graph_manager.h | 1 + .../passes/mark_node_unknown_shape_pass.cc | 99 +++++++++++ .../passes/mark_node_unknown_shape_pass.h | 32 ++++ ge/graph/passes/reshape_recovery_pass.cc | 14 +- ge/hybrid/executor/hybrid_model_executor.cc | 7 +- ge/hybrid/model/node_item.cc | 2 +- .../node_executor/aicore/aicore_op_task.cc | 9 + .../node_executor/aicore/aicore_op_task.h | 2 + ge/offline/main.cc | 12 +- ge/offline/single_op_parser.cc | 18 +- ge/offline/single_op_parser.h | 2 + ge/single_op/single_op.cc | 147 ++++++++++++++-- ge/single_op/single_op.h | 7 +- ge/single_op/single_op_model.cc | 57 +++++- ge/single_op/single_op_model.h | 7 +- ge/single_op/stream_resource.cc | 16 ++ ge/single_op/stream_resource.h | 5 + ge/single_op/task/op_task.cc | 65 +++++-- ge/single_op/task/op_task.h | 12 +- ge/single_op/task/tbe_task_builder.cc | 101 ++++------- ge/single_op/task/tbe_task_builder.h | 1 - inc/framework/generator/ge_generator.h | 6 +- inc/framework/omg/omg_inner_types.h | 1 + tests/ut/ge/CMakeLists.txt | 4 + .../ut/ge/generator/ge_generator_unittest.cc | 29 ++-- .../mark_node_unknown_shape_pass_unittest.cc | 115 ++++++++++++ .../passes/reshape_recovery_pass_unittest.cc | 69 ++++++++ tests/ut/ge/single_op/single_op_unittest.cc | 163 ++++++++++++++++++ 33 files changed, 957 insertions(+), 147 deletions(-) create mode 100644 ge/graph/passes/mark_node_unknown_shape_pass.cc create mode 100644 ge/graph/passes/mark_node_unknown_shape_pass.h create mode 100644 tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc create mode 100644 tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc create mode 100644 tests/ut/ge/single_op/single_op_unittest.cc diff --git a/ge/CMakeLists.txt b/ge/CMakeLists.txt index 87e89a38..d84bb89a 100755 --- a/ge/CMakeLists.txt +++ b/ge/CMakeLists.txt @@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST "graph/passes/atomic_addr_clean_pass.cc" "graph/passes/mark_same_addr_pass.cc" "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/passes/mark_node_unknown_shape_pass.cc" "graph/passes/mark_agnostic_pass.cc" "graph/partition/dynamic_shape_partition.cc" "graph/partition/stage_partition.cc" @@ -509,6 +510,7 @@ set(INFER_SRC_LIST "graph/passes/atomic_addr_clean_pass.cc" "graph/passes/mark_same_addr_pass.cc" "graph/passes/mark_graph_unknown_status_pass.cc" + "graph/passes/mark_node_unknown_shape_pass.cc" "graph/passes/mark_agnostic_pass.cc" "graph/common/omg_util.cc" "graph/common/bcast.cc" diff --git a/ge/ge_inference.mk b/ge/ge_inference.mk index f30ba22a..32fc206d 100755 --- a/ge/ge_inference.mk +++ b/ge/ge_inference.mk @@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ + graph/passes/mark_node_unknown_shape_pass.cc \ graph/passes/mark_agnostic_pass.cc \ graph/common/omg_util.cc \ graph/common/bcast.cc \ diff --git a/ge/ge_runner.mk b/ge/ge_runner.mk index 0efcf820..49515fe4 100644 --- a/ge/ge_runner.mk +++ b/ge/ge_runner.mk @@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \ graph/passes/atomic_addr_clean_pass.cc \ graph/passes/mark_same_addr_pass.cc \ graph/passes/mark_graph_unknown_status_pass.cc \ + graph/passes/mark_node_unknown_shape_pass.cc \ graph/passes/mark_agnostic_pass.cc \ graph/partition/dynamic_shape_partition.cc \ graph/partition/stage_partition.cc \ diff --git a/ge/generator/ge_generator.cc b/ge/generator/ge_generator.cc index 14882683..feff7d21 100644 --- a/ge/generator/ge_generator.cc +++ b/ge/generator/ge_generator.cc @@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; const int64_t kDynamicDimValue = -2; const int kDefaultDeviceId = 0; const int kDefaultJobId = 0; +const int32_t kFuzzBuildPattern = 1; std::map engine_type_map{ {ge::ENGINE_SYS, kEngineNameDefault}, @@ -296,13 +297,44 @@ static Status ResetTensorVecShape(const vector &inputs, vectorGetName().c_str()); + GE_CHECK_NOTNULL(ge_root_model->GetRootGraph()); + for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) { + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(node->GetOpDesc()); + GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str()); + node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD); + } + (void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs); + if (!fuzz_build_attrs.empty()) { + GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str()); + return SUCCESS; + } else { + GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str()); + } + return SUCCESS; +} + +static bool HasShapeRange(const vector &inputs) { + for (const auto &input : inputs) { + vector> shape_range; + (void)input.GetTensorDesc().GetShapeRange(shape_range); + if (!shape_range.empty()) { + GELOGD("Has set shape range."); + return true; + } + } + return false; +} + class GeGenerator::Impl { public: Impl(OmgContext &omg_context) : omg_context_(omg_context) {} ~Impl() = default; Status BuildModel(const Graph &graph, const vector &inputs, GeRootModelPtr &ge_models); - Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model); Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff); @@ -742,7 +774,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, - bool is_offline) { + bool is_offline, int32_t compile_flag) { + GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size()); GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID); impl_->is_offline_ = is_offline; if (!is_offline) { @@ -764,6 +797,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc); GE_CHECK_NOTNULL(op_desc_tmp); + bool fuzz_compile_flag = false; + if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) { + fuzz_compile_flag = true; + } + if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) { + GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str()); + return FAILED; + } + impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag; + // 1. Create ComputeGraph. string name = ge::CurrentTimeInStr() + "_" + model_file_name; Graph graph; @@ -810,6 +853,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic)); GE_CHK_STATUS_RET_NOLOG( impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic)); + } else if (fuzz_compile_flag) { + GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str()); + (void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag); + GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs; + if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) { + GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str()); + return FAILED; + } + if (!fuzz_build_attrs.empty()) { + GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs), + return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed."); + } + GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); } else { GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs)); } @@ -825,15 +881,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in * @param [in] vector &inputs: Operator input data description information. * @param [in] vector &outputs: Operator output data description information. * @param [in] const string &model_file_name: Offline model filename. + * @param [in] compile_flag: op build flag from atc * @return SUCCESS handle successfully / others handle failed */ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, - const vector &outputs, const string &model_file_name) { + const vector &outputs, const string &model_file_name, + int32_t compile_flag) { ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size()); ModelBufferData model_buff; OpEngineType engine_type = ENGINE_SYS; - Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true); + Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag); GELOGI("Finish build single offline model, status: %u", status); return status; } @@ -850,7 +908,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, ModelBufferData &model_buff) { @@ -864,7 +921,12 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff) { - return SUCCESS; + ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther); + GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size()); + Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false, + compile_flag); + GELOGI("Finish build single online model, status: %u", status); + return status; } Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector &inputs, diff --git a/ge/graph/manager/graph_manager.cc b/ge/graph/manager/graph_manager.cc index 19679a2a..d866beca 100755 --- a/ge/graph/manager/graph_manager.cc +++ b/ge/graph/manager/graph_manager.cc @@ -61,6 +61,7 @@ #include "graph/passes/iterator_op_pass.h" #include "graph/passes/link_gen_mask_nodes_pass.h" #include "graph/passes/mark_graph_unknown_status_pass.h" +#include "graph/passes/mark_node_unknown_shape_pass.h" #include "graph/passes/merge_pass.h" #include "graph/passes/merge_input_memcpy_pass.h" #include "graph/passes/merge_to_stream_merge_pass.h" @@ -864,6 +865,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); @@ -878,7 +881,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); return ret; @@ -896,6 +899,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetAllNodes()) { + OpDescPtr op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag); + if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) { + GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str()); + return FAILED; + } + } + return SUCCESS; +} + Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) { PassManager pass_manager; GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass)); @@ -2487,6 +2506,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) { new (std::nothrow) VariableRefDeleteOpPass)) GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass", new (std::nothrow) CompileNodesPass)) + GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( + "OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass)) GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass( "OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass)) GE_CHK_STATUS_RET( diff --git a/ge/graph/manager/graph_manager.h b/ge/graph/manager/graph_manager.h index 661cf9d8..b63b138a 100644 --- a/ge/graph/manager/graph_manager.h +++ b/ge/graph/manager/graph_manager.h @@ -358,6 +358,7 @@ class GraphManager { ComputeGraphPtr &compute_graph, GeRootModelPtr &ge_root_model, uint64_t session_id); + Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph); Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph, Graph2SubGraphInfoList &sub_graph_map, diff --git a/ge/graph/passes/mark_node_unknown_shape_pass.cc b/ge/graph/passes/mark_node_unknown_shape_pass.cc new file mode 100644 index 00000000..c040e846 --- /dev/null +++ b/ge/graph/passes/mark_node_unknown_shape_pass.cc @@ -0,0 +1,99 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/mark_node_unknown_shape_pass.h" +#include "graph/utils/node_utils.h" +#include "graph/debug/ge_attr_define.h" +#include "graph/common/local_context.h" + +namespace ge { +namespace { +const char *const kEngineNameAiCore = "AIcoreEngine"; +const char *const kNeedRefreshShape = "_need_generate"; +const char *const kOriginalNode = "_original_node"; +const int32_t kDynamicState = -2; +} + +Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) { + GE_CHECK_NOTNULL(graph); + if (!GetLocalOmgContext().fuzz_compile_flag) { + return SUCCESS; + } + if (IsAllAicoreSupportDyn(graph)) { + if (UpdateNodeShapeToUnknown(graph) != SUCCESS) { + GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown."); + return FAILED; + } + } + return SUCCESS; +} + +bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) { + bool is_all_aicore_support_dyn = false; + for (const auto &node : graph->GetAllNodes()) { + if (node->GetOpDesc() == nullptr) { + continue; + } + if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) { + GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str()); + continue; + } + NodePtr original_node = nullptr; + original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node); + if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) || + (original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) && + !AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) { + GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); + is_all_aicore_support_dyn = true; + } else { + GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str()); + is_all_aicore_support_dyn = false; + break; + } + } + return is_all_aicore_support_dyn; +} + +Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) { + GELOGD("Need to update node shape to dynamic when get fuzz build result."); + for (const auto &node : graph->GetAllNodes()) { + if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) { + continue; + } + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast(i)); + if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) { + continue; + } + GELOGD("Update input shape for %s.", node->GetName().c_str()); + auto input_desc = op_desc->MutableInputDesc(static_cast(i)); + if (input_desc != nullptr) { + input_desc->SetShape(GeShape({kDynamicState})); + } + } + + for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { + if (output_desc != nullptr) { + GELOGD("Update output shape for %s.", node->GetName().c_str()); + output_desc->SetShape(GeShape({kDynamicState})); + } + } + } + return SUCCESS; +} +} // namespace ge \ No newline at end of file diff --git a/ge/graph/passes/mark_node_unknown_shape_pass.h b/ge/graph/passes/mark_node_unknown_shape_pass.h new file mode 100644 index 00000000..b78b7826 --- /dev/null +++ b/ge/graph/passes/mark_node_unknown_shape_pass.h @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ +#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ +#include "graph/graph.h" +#include "inc/graph_pass.h" + +namespace ge { +class MarkNodeUnknownShapePass : public GraphPass { +public: + Status Run(ComputeGraphPtr graph); + +private: + bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph); + Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph); +}; +} // namespace ge +#endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_ diff --git a/ge/graph/passes/reshape_recovery_pass.cc b/ge/graph/passes/reshape_recovery_pass.cc index f0987ff5..84050e87 100644 --- a/ge/graph/passes/reshape_recovery_pass.cc +++ b/ge/graph/passes/reshape_recovery_pass.cc @@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) { GE_CHECK_NOTNULL(dst_node->GetOpDesc()); auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx()); GE_CHECK_NOTNULL(dst_tensor); - bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK && - dst_tensor->GetShape().GetDims() != UNKNOWN_RANK && - src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims(); + bool is_dynamic = false; + const auto &src_tensor_dims = src_tensor->GetShape().GetDims(); + const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims(); + if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; })) + || (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) { + GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(), + dst_node->GetName().c_str()); + is_dynamic = true; + } + bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims && + !is_dynamic; if (is_need_insert_reshape) { auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph()); GE_CHECK_NOTNULL(reshape); diff --git a/ge/hybrid/executor/hybrid_model_executor.cc b/ge/hybrid/executor/hybrid_model_executor.cc index 85b2e9ac..4a8a0af0 100755 --- a/ge/hybrid/executor/hybrid_model_executor.cc +++ b/ge/hybrid/executor/hybrid_model_executor.cc @@ -54,6 +54,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) { "[%s] check input node shape by shape range failed.", root_graph_item->GetName().c_str()); } + if (context_.global_step != nullptr) { GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream)); @@ -100,8 +101,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor, GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id)); } - HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); - RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); + if (!model_->IsSingleOp()) { + HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph."); + RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End"); + } args.outputs.clear(); HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs"); diff --git a/ge/hybrid/model/node_item.cc b/ge/hybrid/model/node_item.cc index f14e9a21..ef43d09f 100644 --- a/ge/hybrid/model/node_item.cc +++ b/ge/hybrid/model/node_item.cc @@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() { Status NodeItem::ResolveDynamicState() { (void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic); - GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic); + GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic); if (!is_dynamic) { GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic), "[%s] Failed to get shape status.", diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.cc b/ge/hybrid/node_executor/aicore/aicore_op_task.cc index 6f9a5a52..9bfbe47f 100644 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.cc +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.cc @@ -22,6 +22,7 @@ #include "hybrid/node_executor/aicore/aicore_task_builder.h" #include "graph/load/model_manager/tbe_handle_store.h" #include "graph/types.h" +#include "single_op/task/build_task_utils.h" using optiling::OpRunInfo; @@ -31,6 +32,7 @@ namespace { constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; constexpr char const *kAttrOpParamSize = "op_para_size"; constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size"; +std::atomic log_id(0); } // namespace TbeHandleHolder::TbeHandleHolder(void *bin_handle) @@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr &&holder) { } Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) { + log_name_ = op_desc.GetName() + "_tvmbin"; + log_id_ = log_id++; + auto op_desc_ptr = MakeShared(op_desc); + GE_CHECK_NOTNULL(op_desc_ptr); + auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr); + GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str()); GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def)); GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc)); @@ -67,6 +75,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) output_indices_to_skip_.push_back(i); } } + GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str()); return SUCCESS; } diff --git a/ge/hybrid/node_executor/aicore/aicore_op_task.h b/ge/hybrid/node_executor/aicore/aicore_op_task.h index f7d0854f..fe18bfd0 100755 --- a/ge/hybrid/node_executor/aicore/aicore_op_task.h +++ b/ge/hybrid/node_executor/aicore/aicore_op_task.h @@ -114,6 +114,8 @@ class AiCoreOpTask { uint32_t tiling_key_ = 0; void *handle_ = nullptr; bool is_dynamic_ = false; + uint64_t log_id_ = 0; + std::string log_name_; }; class AtomicAddrCleanOpTask : public AiCoreOpTask { diff --git a/ge/offline/main.cc b/ge/offline/main.cc index 28d16a79..54a1d8fb 100755 --- a/ge/offline/main.cc +++ b/ge/offline/main.cc @@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path"); DEFINE_string(display_model_info, "0", "Optional; display model info"); +DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance." + "normal: no need to compile, used saved .o files directly;" + "high: need to recompile, high execute performance mode."); + class GFlagUtils { public: /** @@ -330,7 +334,8 @@ class GFlagUtils { "Default value: $HOME/atc_data\n" " --op_compiler_cache_mode Set the operator compilation cache mode." "Options are disable(default), enable and force(force to refresh the cache)\n" - " --display_model_info enable for display model info; 0(default): close display, 1: open display"); + " --display_model_info enable for display model info; 0(default): close display, 1: open display.\n" + " --performance_mode Set high performance mode of compile or execute."); gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); // Using gflags to analyze input parameters @@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map &options) { options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode); options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path); options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path); + options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode); } domi::Status GenerateSingleOp(const std::string& json_file_path) { @@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) { output_path = FLAGS_output + "/"; } output_path += param.file_name; - ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path); + ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag); if (ret != SUCCESS) { DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index); ret = domi::FAILED; @@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() { options.insert(std::pair(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path)); options.insert(std::pair(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info)); + + options.insert(std::pair(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode)); // set enable scope fusion passes SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes); // print atc option map diff --git a/ge/offline/single_op_parser.cc b/ge/offline/single_op_parser.cc index 2fa0a043..ce9448d5 100644 --- a/ge/offline/single_op_parser.cc +++ b/ge/offline/single_op_parser.cc @@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format"; constexpr char const *kFileSuffix = ".om"; constexpr char const *kKeyDynamicInput = "dynamic_input"; constexpr char const *kKeyDynamicOutput = "dynamic_output"; +constexpr char const *kKeyCompileFlag = "compile_flag"; constexpr int kDumpJsonIndent = 2; constexpr int kShapeRangePairSize = 2; constexpr int kShapeRangeLow = 0; @@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) { } void from_json(const Json &j, SingleOpDesc &desc) { - desc.op = j.at(kKeyOp).get(); + auto op = j.find(kKeyOp); + if (op != j.end()) { + desc.op = j.at(kKeyOp).get(); + } auto input_desc = j.find(kKeyInputDesc); if (input_desc != j.end()) { @@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) { if (attr_field != j.end()) { desc.attrs = attr_field->get>(); } + + auto compile_flag = j.find(kKeyCompileFlag); + if (compile_flag != j.end()) { + desc.compile_flag = compile_flag->get(); + } } Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) { @@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector input_desc; std::vector output_desc; std::vector attrs; + int32_t compile_flag = 0; }; struct SingleOpBuildParam { @@ -62,6 +63,7 @@ struct SingleOpBuildParam { std::vector inputs; std::vector outputs; std::string file_name; + int32_t compile_flag = 0; }; void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc); diff --git a/ge/single_op/single_op.cc b/ge/single_op/single_op.cc index f3f0b647..c305eea9 100755 --- a/ge/single_op/single_op.cc +++ b/ge/single_op/single_op.cc @@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32; const size_t kDataMemAlignUnit = 2; const string kShapeTypeDynamic = "dynamic"; const string kShapeTypeStatic = "static"; +const int64_t kHostMemType = 1; +const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; +const uint32_t kAlignBytes = 512; size_t GetAlignedSize(size_t size) { size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize; @@ -65,6 +68,72 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) { profiling_manager.ReportProfilingData(model_id, task_desc_info); return SUCCESS; } + +Status CalInputsHostMemSize(const std::vector &inputs, + std::vector> &inputs_size) { + int64_t total_size = 0; + size_t index = 0; + for (auto &input_buffer : inputs) { + int64_t input_size = 0; + if (input_buffer.placement == kHostMemType) { + GE_CHECK_LE(input_buffer.length, INT64_MAX); + input_size = input_buffer.length; + // input_size pad to 512 + GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX."); + input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes; + inputs_size.emplace_back(index, input_size); + GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX."); + total_size += input_size; + GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size); + } + index++; + } + if (total_size > kFuzzDeviceBufferSize) { + GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size); + return FAILED; + } + return SUCCESS; +} + +Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream, + const std::vector> &inputs_size, + std::vector &update_buffers) { + GE_CHECK_NOTNULL(stream_resource); + if (stream_resource->Init() != SUCCESS) { + GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer."); + return FAILED; + } + auto dst_addr = reinterpret_cast(stream_resource->GetDeviceBufferAddr()); + // copy host mem from input_buffer to device mem of dst_addr + for (const auto &input_size : inputs_size) { + size_t index = input_size.first; + auto size = input_size.second; + GELOGD("Do H2D for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length); + GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length, + RT_MEMCPY_HOST_TO_DEVICE_EX, stream)); + update_buffers[index].data = dst_addr; + dst_addr = reinterpret_cast(dst_addr + size); + } + return SUCCESS; +} + +Status InitHybridModelArgs(const std::vector &input_buffers, + const std::vector &output_buffers, + const std::vector &inputs_desc, + hybrid::HybridModelExecutor::ExecuteArgs &args) { + for (auto &input : input_buffers) { + args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); + } + for (auto &output : output_buffers) { + args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); + } + for (auto &tensor_desc : inputs_desc) { + auto desc = MakeShared(tensor_desc); + GE_CHECK_NOTNULL(desc); + args.input_desc.emplace_back(desc); + } + return SUCCESS; +} } // namespace SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream) @@ -168,13 +237,28 @@ Status SingleOp::UpdateArgs(const std::vector &inputs, const std::ve FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector &inputs, const std::vector &outputs) { + GELOGD("Start SingleOp::ExecuteAsync."); Status ret = ValidateArgs(inputs, outputs); if (ret != SUCCESS) { return ret; } GE_CHECK_NOTNULL(stream_resource_); + vector> inputs_size; + GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size)); std::lock_guard lk(*stream_mutex_); + vector update_buffers = inputs; + if (!inputs_size.empty()) { + GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers)); + } + + if (hybrid_model_executor_ != nullptr) { + GELOGD("Execute multi-task single op by hybrid model executor"); + hybrid::HybridModelExecutor::ExecuteArgs args; + GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args)); + return hybrid_model_executor_->Execute(args); + } + auto current_mem_base = stream_resource_->GetMemoryBase(); if (running_param_->mem_base != current_mem_base) { running_param_->mem_base = const_cast(current_mem_base); @@ -185,7 +269,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c task->GetOpdesc()->GetName().c_str()); } } - ret = UpdateArgs(inputs, outputs); + ret = UpdateArgs(update_buffers, outputs); if (ret != SUCCESS) { return ret; } @@ -252,33 +336,64 @@ Status DynamicSingleOp::ValidateParams(const vector &input_desc, return SUCCESS; } +Status DynamicSingleOp::SetHostTensorValue(const std::vector> &inputs_size, + const vector &input_desc, + const std::vector &input_buffers) { + auto op_desc = op_task_->GetOpdesc(); + GE_CHECK_NOTNULL(op_desc); + GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str()); + for (const auto &input_size : inputs_size) { + size_t index = input_size.first; + auto ge_tensor_desc = input_desc.at(index); + // reconstruct GeTensor by DataBuffer + GeTensorPtr ge_tensor = MakeShared(ge_tensor_desc); + GE_CHECK_NOTNULL(ge_tensor); + GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.", + index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length); + if (ge_tensor->SetData(reinterpret_cast(input_buffers[index].data), + static_cast(input_buffers[index].length)) != SUCCESS) { + GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor."); + return INTERNAL_ERROR; + } + auto tensor_desc = op_desc->MutableInputDesc(index); + GE_CHECK_NOTNULL(tensor_desc); + if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) { + GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str()); + return FAILED; + } + } + return SUCCESS; +} + Status DynamicSingleOp::ExecuteAsync(const vector &input_desc, const vector &input_buffers, vector &output_desc, vector &output_buffers) { + GELOGD("Start DynamicSingleOp::ExecuteAsync."); GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers)); + vector> inputs_size; + GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size)); + vector update_buffers = input_buffers; + std::lock_guard lk(*stream_mutex_); + if (!inputs_size.empty()) { + StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_); + GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers)); + } + if (hybrid_model_executor_ != nullptr) { GELOGD("Execute multi-task dynamic single op by hybrid model executor"); hybrid::HybridModelExecutor::ExecuteArgs args; - for (auto &input : input_buffers) { - args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length)); - } - for (auto &output : output_buffers) { - args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length)); - } - for (auto &tensor_desc : input_desc) { - auto desc = MakeShared(tensor_desc); - GE_CHECK_NOTNULL(desc); - args.input_desc.emplace_back(desc); - } + GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args)); return hybrid_model_executor_->Execute(args); } - - std::lock_guard lk(*stream_mutex_); GE_CHECK_NOTNULL(op_task_); - - GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); + if (!inputs_size.empty()) { + GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers)); + GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_)); + } else { + GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_)); + } GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_)); GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic)); return SUCCESS; diff --git a/ge/single_op/single_op.h b/ge/single_op/single_op.h index b350b684..01d6dfc0 100755 --- a/ge/single_op/single_op.h +++ b/ge/single_op/single_op.h @@ -59,6 +59,9 @@ class SingleOp { std::vector tasks_; std::vector> arg_table_; std::unique_ptr running_param_; + std::unique_ptr hybrid_model_; + std::unique_ptr hybrid_model_executor_; + std::vector inputs_desc_; }; class DynamicSingleOp { @@ -76,7 +79,8 @@ class DynamicSingleOp { const std::vector &inputs, std::vector &output_desc, std::vector &outputs) const; - + Status SetHostTensorValue(const std::vector> &inputs_size, + const vector &input_desc, const std::vector &input_buffers); std::unique_ptr op_task_; std::unique_ptr hybrid_model_; std::unique_ptr hybrid_model_executor_; @@ -85,6 +89,7 @@ class DynamicSingleOp { rtStream_t stream_ = nullptr; size_t num_inputs_ = 0; size_t num_outputs_ = 0; + ComputeGraphPtr compute_graph_; }; } // namespace ge #endif // GE_SINGLE_OP_SINGLE_OP_H_ diff --git a/ge/single_op/single_op_model.cc b/ge/single_op/single_op_model.cc index 3c2b7cc3..d2f8062a 100755 --- a/ge/single_op/single_op_model.cc +++ b/ge/single_op/single_op_model.cc @@ -43,6 +43,8 @@ using std::vector; namespace ge { namespace { const size_t kDataOutputNum = 1; +const uint32_t kOutputIndexOfData = 0; +constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape"; Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); @@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) { auto op_desc = node->GetOpDesc(); GE_CHECK_NOTNULL(op_desc); const auto &depends = op_desc->GetOpInferDepends(); - if (!depends.empty()) { + bool support_dynamic_shape = false; + (void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape); + if (!depends.empty() && support_dynamic_shape) { flag = true; return SUCCESS; } @@ -462,6 +466,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa *task = aicpucc_task.release(); return SUCCESS; } +Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, + SingleOp &single_op) { + for (const auto &op_desc : data_ops_) { + auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData); + GeTensorDesc tensor_desc(output_tensor_desc); + single_op.inputs_desc_.emplace_back(tensor_desc); + GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str()); + } + GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized()); + auto root_model = model_helper_.GetGeRootModel(); + GE_CHECK_NOTNULL(root_model); + root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph())); + root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model); + single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); + GE_CHECK_NOTNULL(single_op.hybrid_model_); + GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed."); + int32_t device_id = 0; + GE_CHK_RT_RET(rtGetDevice(&device_id)); + single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), + device_id, + resource.GetStream())); + GE_CHECK_NOTNULL(single_op.hybrid_model_executor_); + GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); + return SUCCESS; +} Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs()); @@ -469,10 +498,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) { single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_)); GE_CHECK_NOTNULL(single_op.running_param_); GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op)); + auto ge_model = model_helper_.GetGeModel(); + GE_CHECK_NOTNULL(ge_model); + bool infer_depend_flag = false; + GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed."); + if (infer_depend_flag) { + // construct single_op, do single op with HybridModelExecutor + GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor."); + return InitHybridModelExecutor(resource, ge_model, single_op); + } return BuildTaskList(&resource, single_op); } -Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) { +Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def, + DynamicSingleOp &single_op) { auto task_type = static_cast(task_def.type()); const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() : task_def.kernel_with_handle().context(); @@ -483,6 +522,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl TbeOpTask *tbe_task = nullptr; GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task)); tbe_task->SetModelArgs(model_name_, model_id_); + if (tbe_task->tiling_buffer_ != nullptr) { + GELOGD("tiling buffer is not nullptr."); + tbe_task->stream_resource_ = stream_resource; + } single_op.op_task_.reset(tbe_task); } else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) { GELOGD("Building AICPU_CC task"); @@ -504,10 +547,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl return SUCCESS; } -Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { +Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) { auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); + auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph()); + GE_CHECK_NOTNULL(compute_graph); + single_op.compute_graph_ = compute_graph; auto tasks = ge_model->GetModelTaskDefPtr()->task(); for (int i = 0; i < tasks.size(); ++i) { const TaskDef &task_def = tasks[i]; @@ -521,7 +567,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { "BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks."); return ACL_ERROR_GE_OP_TASK_TYPE_INVALID; } - GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op)); + GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op)); } else if (task_type == RT_MODEL_TASK_KERNEL_EX) { if (single_op.op_task_ != nullptr) { GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks."); @@ -561,6 +607,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & single_op.num_outputs_ = netoutput_op_->GetAllInputsSize(); GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource)); model_params_.memory_size = UINT_MAX; + model_params_.graph_is_dynamic = true; auto ge_model = model_helper_.GetGeModel(); GE_CHECK_NOTNULL(ge_model); @@ -585,6 +632,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp & GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed."); return SUCCESS; } - return BuildTaskListForDynamicOp(single_op); + return BuildTaskListForDynamicOp(&resource, single_op); } } // namespace ge diff --git a/ge/single_op/single_op_model.h b/ge/single_op/single_op_model.h index b1a7d3ea..d900f09f 100755 --- a/ge/single_op/single_op_model.h +++ b/ge/single_op/single_op_model.h @@ -40,6 +40,7 @@ struct SingleOpModelParam { std::map addr_mapping_; int64_t core_type = 0; + bool graph_is_dynamic = false; }; class SingleOpModel { @@ -65,15 +66,17 @@ class SingleOpModel { void ParseOutputNode(const OpDescPtr &op_desc); Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op); - Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op); + Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op); Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task); Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task, bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id); Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id); - Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op); + Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def, + DynamicSingleOp &single_op); static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam ¶m); void ParseArgTable(OpTask *task, SingleOp &op); + Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op); std::string model_name_; uint32_t model_id_ = 0; diff --git a/ge/single_op/stream_resource.cc b/ge/single_op/stream_resource.cc index 5f009f63..e7049297 100755 --- a/ge/single_op/stream_resource.cc +++ b/ge/single_op/stream_resource.cc @@ -22,6 +22,11 @@ #include "single_op/single_op_model.h" namespace ge { +namespace { +// limit available device mem size 1M +const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024; +} + StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) { } @@ -39,6 +44,17 @@ StreamResource::~StreamResource() { GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); } } + + if (device_buffer_ != nullptr) { + auto rt_ret = rtFree(device_buffer_); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed.")); + } +} + +Status StreamResource::Init() { + auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM); + GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed.")); + return SUCCESS; } SingleOp *StreamResource::GetOperator(const uint64_t key) { diff --git a/ge/single_op/stream_resource.h b/ge/single_op/stream_resource.h index 73a6231b..aecb38c8 100755 --- a/ge/single_op/stream_resource.h +++ b/ge/single_op/stream_resource.h @@ -40,6 +40,7 @@ class StreamResource { rtStream_t GetStream() const; void SetStream(rtStream_t stream); + Status Init(); SingleOp *GetOperator(const uint64_t key); DynamicSingleOp *GetDynamicOperator(const uint64_t key); @@ -49,6 +50,9 @@ class StreamResource { uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true); uint8_t *MallocWeight(const std::string &purpose, size_t size); const uint8_t *GetMemoryBase() const; + void *GetDeviceBufferAddr() const { + return device_buffer_; + } private: uint8_t *DoMallocMemory(const std::string &purpose, @@ -65,6 +69,7 @@ class StreamResource { rtStream_t stream_ = nullptr; std::mutex mu_; std::mutex stream_mu_; + void *device_buffer_ = nullptr; }; } // namespace ge diff --git a/ge/single_op/task/op_task.cc b/ge/single_op/task/op_task.cc index 2a580c7e..bce52335 100755 --- a/ge/single_op/task/op_task.cc +++ b/ge/single_op/task/op_task.cc @@ -137,7 +137,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id return SUCCESS; } -Status OpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { +Status OpTask::UpdateRunInfo() { return UNSUPPORTED; } @@ -200,14 +200,14 @@ void TbeOpTask::SetHandle(void *handle) { Status TbeOpTask::LaunchKernel(rtStream_t stream) { GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_); - auto *sm_desc = reinterpret_cast(sm_desc_); - auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast(arg_size_), sm_desc, stream); + auto ret = DoLaunchKernel(stream); + int retry_times = 0; while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) { retry_times++; GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times); std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime)); - ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream); + ret = DoLaunchKernel(stream); } if (ret != RT_ERROR_NONE) { @@ -220,8 +220,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) { return SUCCESS; } -Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const vector &output_desc) { - GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); +Status TbeOpTask::UpdateRunInfo() { // invoke OpParaCalculate GELOGD("Start to invoke OpParaCalculate."); optiling::OpRunInfo run_info; @@ -235,10 +234,9 @@ Status TbeOpTask::UpdateRunInfo(const vector &input_desc, const ve block_dim_ = run_info.block_dim; tiling_data_ = run_info.tiling_data.str(); tiling_key_ = run_info.tiling_key; + run_info_workspaces_ = run_info.workspaces; GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_, tiling_data_.size(), tiling_key_); - - GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed."); return SUCCESS; } @@ -288,14 +286,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector &input_desc, cons return SUCCESS; } -void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) { +Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) { + if (tiling_buffer != nullptr) { + uintptr_t *arg_base = nullptr; + size_t arg_num = 0; + GetIoAddr(arg_base, arg_num); + GE_CHECK_NOTNULL(node); + GE_CHECK_NOTNULL(node->GetOpDesc()); + uint32_t inputs_num = node->GetOpDesc()->GetInputsSize(); + uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize(); + uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size(); + uint32_t tiling_index = inputs_num + outputs_num + workspace_nums; + if (arg_num == 0 || arg_num < tiling_index) { + GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.", + tiling_index, arg_num); + return ACL_ERROR_GE_INTERNAL_ERROR; + } + arg_base[tiling_index] = reinterpret_cast(tiling_buffer); + } node_ = node; tiling_buffer_ = tiling_buffer; max_tiling_size_ = max_tiling_size; + return SUCCESS; } Status TbeOpTask::AllocateWorkspaces(const vector &workspace_sizes) { static const std::string kPurpose("malloc workspace memory for dynamic op."); + workspaces_.clear(); if (workspace_sizes.empty()) { GELOGD("No need to allocate workspace."); return SUCCESS; @@ -333,8 +350,10 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, vector &output_desc, vector &output_buffers, rtStream_t stream) { - GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc)); GELOGD("[%s] Start to launch kernel", node_->GetName().c_str()); + GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc)); + GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo()); + GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed."); std::vector args; for (auto &buffer : input_buffers) { args.emplace_back(buffer.data); @@ -354,6 +373,15 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, args.emplace_back(tiling_buffer_); } + GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *)); + // node with workspace: build can not get size of workspace, need to update arg_size_ when execute + if (arg_size_ < (args.size() * sizeof(void *))) { + size_t temp_size = args.size() * sizeof(void *); + GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size); + args_.reset(new(std::nothrow) uint8_t[temp_size]()); + GE_CHECK_NOTNULL(args_); + arg_size_ = temp_size; + } if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) { GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str()); REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str()); @@ -361,17 +389,22 @@ Status TbeOpTask::LaunchKernel(const vector &input_desc, } GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str()); + GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel."); + + return SUCCESS; +} + +Status TbeOpTask::DoLaunchKernel(rtStream_t stream) { + auto *sm_desc = reinterpret_cast(sm_desc_); if (handle_ == nullptr) { - GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream)); - GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str()); + GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast(arg_size_), + sm_desc, stream)); } else { std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_); std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_); - GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr, - stream, kernel_info.c_str())); - GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str()); + GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), + static_cast(arg_size_), sm_desc, stream, kernel_info.c_str())); } - return SUCCESS; } diff --git a/ge/single_op/task/op_task.h b/ge/single_op/task/op_task.h index 8c91bd5f..0c64ecb4 100644 --- a/ge/single_op/task/op_task.h +++ b/ge/single_op/task/op_task.h @@ -30,6 +30,7 @@ #include "cce/aicpu_engine_struct.h" #include "hybrid/node_executor/aicpu/aicpu_ext_info.h" #include "init/gelib.h" +#include "register/op_tiling.h" namespace ge { class StreamResource; @@ -39,8 +40,7 @@ class OpTask { OpTask() = default; virtual ~OpTask() = default; virtual Status LaunchKernel(rtStream_t stream) = 0; - virtual Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc); + virtual Status UpdateRunInfo(); virtual Status UpdateArgTable(const SingleOpModelParam ¶m); void SetModelArgs(std::string model_name, uint32_t model_id); Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id); @@ -81,22 +81,23 @@ class TbeOpTask : public OpTask { void SetKernelWithHandleArgs(std::unique_ptr &&args, size_t arg_size, uint32_t block_dim, const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle); - Status UpdateRunInfo(const vector &input_desc, - const vector &output_desc) override; + Status UpdateRunInfo() override; const void *GetArgs() const; size_t GetArgSize() const; const std::string &GetStubName() const; - void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size); + Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size); const std::string &GetTaskType() const override; void SetHandle(void *handle); private: friend class SingleOpModel; + friend class TbeTaskBuilder; static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor); Status UpdateNodeByShape(const vector &input_desc, const vector &output_desc); Status AllocateWorkspaces(const std::vector &workspace_sizes); + Status DoLaunchKernel(rtStream_t stream); const void *stub_func_ = nullptr; std::unique_ptr args_; @@ -108,6 +109,7 @@ class TbeOpTask : public OpTask { void *tiling_buffer_ = nullptr; uint32_t max_tiling_size_ = 0; std::string tiling_data_; + std::vector run_info_workspaces_; std::vector workspaces_; NodePtr node_; diff --git a/ge/single_op/task/tbe_task_builder.cc b/ge/single_op/task/tbe_task_builder.cc index 177f42f8..c7ff13d1 100644 --- a/ge/single_op/task/tbe_task_builder.cc +++ b/ge/single_op/task/tbe_task_builder.cc @@ -308,92 +308,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m } Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc) { - size_t arg_size = kernel_def_.args_size(); - auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); - GE_CHECK_NOTNULL(args); - - auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d", - arg_size, static_cast(rt_ret)); - REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); - return RT_ERROR_TO_GE_STATUS(rt_ret); + auto task_type = static_cast(task_def_.type()); + bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL); + size_t arg_size = 0; + std::unique_ptr args = nullptr; + if (is_task_all_kernel) { + GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str()); + arg_size = kernel_def_with_handle_.args_size(); + args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); + GE_CHECK_NOTNULL(args); + GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, + RT_MEMCPY_HOST_TO_HOST)) + } else { + GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str()); + arg_size = kernel_def_.args_size(); + args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); + GE_CHECK_NOTNULL(args); + GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST)) } - const domi::KernelContext &context = kernel_def_.context(); + const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ? + kernel_def_with_handle_.context() : kernel_def_.context(); const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); uint16_t offset = *args_offset_tmp; - bool is_dynamic = false; - (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); - if (is_dynamic) { - GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); - } else { - // copy args - std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); - void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); - uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); - REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast(rt_ret)); - return RT_ERROR_TO_GE_STATUS(rt_ret); - } - } - task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); + // copy args + std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); + void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); + uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); + GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST)); - return SUCCESS; -} - -Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, - const OpDescPtr &op_desc) { - size_t arg_size = kernel_def_with_handle_.args_size(); - auto args = std::unique_ptr(new (std::nothrow) uint8_t[arg_size]); - GE_CHECK_NOTNULL(args); - - auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d", - arg_size, static_cast(rt_ret)); - REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast(rt_ret)); - return rt_ret; + if (is_task_all_kernel) { + task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, + kernel_def_with_handle_); + } else { + task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc); } - const domi::KernelContext &context = kernel_def_with_handle_.context(); - const auto *args_offset_tmp = reinterpret_cast(context.args_offset().data()); - uint16_t offset = *args_offset_tmp; - bool is_dynamic = false; (void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic); if (is_dynamic) { GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task)); - } else { - // copy args - std::vector tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param); - void *src_addr = reinterpret_cast(tensor_device_addr_vec.data()); - uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size(); - rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST); - if (rt_ret != RT_ERROR_NONE) { - GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast(rt_ret)); - REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast(rt_ret)); - return rt_ret; + if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) { + GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str()); + task.UpdateRunInfo(); + GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(), + task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE)); } } - task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc, - kernel_def_with_handle_); - return SUCCESS; } Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam ¶m) { GELOGD("Build tbe task begin"); - auto task_type = static_cast(task_def_.type()); - auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) : - SetKernelArgs(task, param, op_desc_); + auto ret = SetKernelArgs(task, param, op_desc_); if (ret != SUCCESS) { return ret; } + auto task_type = static_cast(task_def_.type()); ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) : RegisterKernel(task, param); task.SetHandle(handle_); @@ -437,7 +410,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) { GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size); } - task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); + task.EnableDynamicSupport(node_, tiling_buffer, static_cast(max_size)); return SUCCESS; } } // namespace ge diff --git a/ge/single_op/task/tbe_task_builder.h b/ge/single_op/task/tbe_task_builder.h index 8af9a68d..a202cbf1 100755 --- a/ge/single_op/task/tbe_task_builder.h +++ b/ge/single_op/task/tbe_task_builder.h @@ -97,7 +97,6 @@ class TbeTaskBuilder { private: Status InitTilingInfo(TbeOpTask &task); Status SetKernelArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); - Status SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam ¶m, const OpDescPtr &op_desc); Status GetSmDesc(void **sm_desc, const SingleOpModelParam ¶m) const; Status RegisterKernel(TbeOpTask &task, const SingleOpModelParam ¶m); diff --git a/inc/framework/generator/ge_generator.h b/inc/framework/generator/ge_generator.h index db3b2039..24f969dd 100644 --- a/inc/framework/generator/ge_generator.h +++ b/inc/framework/generator/ge_generator.h @@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator { /// @param [in] inputs: input tensors. /// @param [in] outputs: output tensors. /// @param [in] model_file_name: name of model file. + /// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1 /// @return SUCCESS or FAILED /// Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector &inputs, - const std::vector &outputs, const std::string &model_file_name); + const std::vector &outputs, const std::string &model_file_name, + int32_t compile_flag = 0); /// /// @ingroup ge /// @brief: Build single Op into model buff. @@ -100,7 +102,7 @@ class GE_FUNC_VISIBILITY GeGenerator { ge::ModelBufferData &model, bool is_offline = true); Status BuildSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs, const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff, - bool is_offline = true); + bool is_offline = true, int32_t compile_flag = 0); bool CheckNoAicore(const ComputeGraphPtr &graph); void RemoveConst(const vector &inputs, vector &outputs); Status CheckForSingleOp(OpDescPtr &op_desc, const vector &inputs, const vector &outputs); diff --git a/inc/framework/omg/omg_inner_types.h b/inc/framework/omg/omg_inner_types.h index 54c9ab4a..84f6ef46 100644 --- a/inc/framework/omg/omg_inner_types.h +++ b/inc/framework/omg/omg_inner_types.h @@ -123,6 +123,7 @@ struct OmgContext { bool need_multi_batch = false; std::vector data_nodes; std::vector getnext_nosink_nodes; + bool fuzz_compile_flag = false; }; } // namespace ge diff --git a/tests/ut/ge/CMakeLists.txt b/tests/ut/ge/CMakeLists.txt index 0bc9a6e1..93d5f154 100755 --- a/tests/ut/ge/CMakeLists.txt +++ b/tests/ut/ge/CMakeLists.txt @@ -278,6 +278,7 @@ set(COMMON_SRC_FILES "${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc" "${GE_CODE_DIR}/ge/graph/passes/buffer_pool_memory_pass.cc" + "${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc" "${GE_CODE_DIR}/ge/model/ge_model.cc" "${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc" "${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc" @@ -708,6 +709,8 @@ set(PASS_TEST_FILES "graph/passes/transpose_transdata_pass_unittest.cc" "graph/passes/parallel_group_pass_unittest.cc" "graph/passes/buffer_pool_memory_pass_unittest.cc" + "graph/passes/mark_node_unknown_shape_pass_unittest.cc" + "graph/passes/reshape_recovery_pass_unittest.cc" ) set(KERNEL_TEST_FILES @@ -799,6 +802,7 @@ set(SINGLE_OP_TEST_FILES "single_op/single_op_manager_unittest.cc" "single_op/stream_resource_unittest.cc" "single_op/single_op_task_unittest.cc" + "single_op/single_op_unittest.cc" ) set(PROFILING_MNG_TEST_FILES diff --git a/tests/ut/ge/generator/ge_generator_unittest.cc b/tests/ut/ge/generator/ge_generator_unittest.cc index fef90ee5..fb256c7c 100644 --- a/tests/ut/ge/generator/ge_generator_unittest.cc +++ b/tests/ut/ge/generator/ge_generator_unittest.cc @@ -45,6 +45,15 @@ ComputeGraphPtr MakeGraph() { builder.AddDataEdge(data, 0, addn1, 0); return builder.GetGraph(); } + +static GeAttrValue::NamedAttrs CreateNamedAttrs(const string &name, std::map map) { + GeAttrValue::NamedAttrs named_attrs; + named_attrs.SetName(name); + for (auto it : map) { + named_attrs.SetAttr(it.first, it.second); + } + return named_attrs; +} } // namespace /* @@ -85,25 +94,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) { GeGenerator generator; generator.Initialize({}); ModelBufferData model_buffer; - EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED); -} - -TEST_F(UtestGeGenerator, test_singleop_fuzz_build) { - GeTensorDesc tensor_desc; - shared_ptr op_desc = make_shared("Add", "add"); - op_desc->AddInputDesc(tensor_desc); - op_desc->AddInputDesc(tensor_desc); - op_desc->AddOutputDesc(tensor_desc); - - GeTensor tensor(tensor_desc); - const vector inputs = { tensor, tensor }; - const vector outputs = { tensor }; - - GeGenerator generator; - generator.Initialize({}); - ModelBufferData model_buffer; - bool compile_flag = true; - EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS); + EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED); } TEST_F(UtestGeGenerator, test_check_aicore) { diff --git a/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc new file mode 100644 index 00000000..5157e510 --- /dev/null +++ b/tests/ut/ge/graph/passes/mark_node_unknown_shape_pass_unittest.cc @@ -0,0 +1,115 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#define private public +#include "graph/passes/mark_node_unknown_shape_pass.h" + +#include "common/ge_inner_error_codes.h" +#include "inc/pass_manager.h" +#include "graph/common/local_context.h" +#undef private + +namespace ge { +class UtestMarkNodeUnknownShapePass : public testing::Test { +protected: + void SetUp() {} + void TearDown() {} +public: + NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) { + GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT); + auto op_desc = std::make_shared(name, type); + for (auto i = 0; i < in_num; ++i) { + op_desc->AddInputDesc(test_desc); + } + for (auto i = 0; i < out_num; ++i) { + op_desc->AddOutputDesc(test_desc); + } + return graph->AddNode(op_desc); + } +/// netoutput1 +/// | +/// conv1 +/// \ / +/// data + void make_graph(const ComputeGraphPtr &graph) { + GetLocalOmgContext().fuzz_compile_flag = true; + auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D"); + { + auto data1 = MakeNode(graph, 1, 1, "data", "Data"); + GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT); + data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc); + data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc); + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0)); + GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1)); + } + + conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine"); + AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true); + auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput"); + GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0)); + } +}; + +TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) { + OpDescPtr op_desc = std::make_shared("Mul", MATMUL); + ComputeGraphPtr graph = std::make_shared("default"); + op_desc->SetOpKernelLibName("GE"); + graph->AddNode(op_desc); + PassManager pass; + pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); + EXPECT_EQ(pass.Run(graph), SUCCESS); +} + +TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) { + OpDescPtr op_desc = std::make_shared("Mul", MATMUL); + ComputeGraphPtr graph = std::make_shared("default"); + op_desc->SetOpKernelLibName("AIcoreEngine"); + graph->AddNode(op_desc); + GetLocalOmgContext().fuzz_compile_flag = true; + PassManager pass; + pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); + EXPECT_EQ(pass.Run(graph), SUCCESS); +} + +TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) { + ComputeGraphPtr graph = std::make_shared("test_graph"); + make_graph(graph); + PassManager pass; + pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass); + EXPECT_EQ(pass.Run(graph), SUCCESS); + EXPECT_EQ(graph->GetAllNodes().size(), 3); + for (const auto &node : graph->GetAllNodes()) { + if (node->GetName() == "conv1") { + auto op_desc = node->GetOpDesc(); + EXPECT_NE(op_desc, nullptr); + for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) { + auto input_desc = op_desc->MutableInputDesc(static_cast(i)); + EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2); + } + for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) { + EXPECT_NE(output_desc, nullptr); + EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2); + } + } + } +} + +} // namespace ge diff --git a/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc new file mode 100644 index 00000000..af60021c --- /dev/null +++ b/tests/ut/ge/graph/passes/reshape_recovery_pass_unittest.cc @@ -0,0 +1,69 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph/passes/reshape_recovery_pass.h" + +#include +#include +#include + +#include "graph_builder_utils.h" + +namespace ge { +class UtestReshapeRecoveryPass : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +namespace { +/// netoutput1 +/// | \ +///transdata1 \ +/// | \ +/// | transdata2 +/// | / +/// var1 const1 +ut::GraphBuilder Graph1Builder() { + ut::GraphBuilder builder = ut::GraphBuilder("g2"); + auto var1 = builder.AddNode("var1", "Variable", 0, 1, FORMAT_ND, DT_FLOAT, {-1}); + auto const1 = builder.AddNode("const1", "Const", 0, 1, FORMAT_ND, DT_FLOAT, {1, 1, 224, 224}); + auto transdata2 = builder.AddNode("transdata2", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); + auto transdata1 = builder.AddNode("transdata1", "Transdata", 1, 1, FORMAT_ND, DT_FLOAT, {224, 224}); + auto netoutput1 = builder.AddNode("netoutput1", "Netoutput", 2, 0); + + builder.AddDataEdge(var1, 0, transdata1, 0); + builder.AddDataEdge(const1, 0, transdata2, 0); + builder.AddDataEdge(transdata2, 0, netoutput1, 1); + builder.AddDataEdge(transdata1, 0, netoutput1, 0); + + return builder; +} +} // namespace + +TEST_F(UtestReshapeRecoveryPass, reshape_recovery_with_dynamic_shape) { + auto builder = Graph1Builder(); + auto graph = builder.GetGraph(); + ReshapeRecoveryPass reshape_recovery_pass; + EXPECT_EQ(graph->GetDirectNodesSize(),5); + Status ret = reshape_recovery_pass.Run(graph); + EXPECT_EQ(ret, SUCCESS); + EXPECT_EQ(graph->GetDirectNodesSize(),8); + + auto reshape1 = graph->FindNode("Reshape_ReshapeRecoveryPass_0"); + EXPECT_NE(reshape1, nullptr); +} +} // namespace ge diff --git a/tests/ut/ge/single_op/single_op_unittest.cc b/tests/ut/ge/single_op/single_op_unittest.cc new file mode 100644 index 00000000..8c2f6e51 --- /dev/null +++ b/tests/ut/ge/single_op/single_op_unittest.cc @@ -0,0 +1,163 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "runtime/rt.h" + +#define protected public +#define private public +#include "single_op/single_op.h" +#include "single_op/single_op_manager.h" +#undef private +#undef protected + +using namespace std; +using namespace ge; + +class UtestSingleOp : public testing::Test { + protected: + void SetUp() {} + void TearDown() {} +}; + +TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) { + uintptr_t resource_id = 0; + std::mutex stream_mu; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); + + vector dims_vec_0 = {2}; + vector input_desc; + GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); + // input data from device + AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0); + input_desc.emplace_back(tensor_desc_0); + + vector input_buffers; + ge::DataBuffer data_buffer; + data_buffer.data = new char[4]; + data_buffer.length = 4; + input_buffers.emplace_back(data_buffer); + + vector output_desc; + vector output_buffers; + + // UpdateRunInfo failed + EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID); +} + +TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) { + uintptr_t resource_id = 0; + std::mutex stream_mu; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream); + dynamic_single_op.num_inputs_ = 1; + + vector dims_vec_0 = {2}; + vector input_desc; + GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32); + // input data from host + AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1); + input_desc.emplace_back(tensor_desc_0); + + int64_t input_size = 0; + EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS); + EXPECT_EQ(input_size, 64); + EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr); + + vector input_buffers; + ge::DataBuffer data_buffer; + data_buffer.data = new char[4]; + data_buffer.length = 4; + input_buffers.emplace_back(data_buffer); + + vector output_desc; + vector output_buffers; + + auto *tbe_task = new (std::nothrow) TbeOpTask(); + ge::OpDescPtr op_desc = std::make_shared("Mul", MATMUL); + ge::ComputeGraphPtr graph = std::make_shared("default"); + ge::NodePtr node = graph->AddNode(op_desc); + tbe_task->node_ = node; + + dynamic_single_op.op_task_.reset((OpTask *)(tbe_task)); + + OpDescPtr desc_ptr = MakeShared("name1", "type1"); + EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS); + dynamic_single_op.op_task_->op_desc_ = desc_ptr; + // UpdateRunInfo failed + EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID); +} + + +TEST_F(UtestSingleOp, test_singleop_execute_async1) { + StreamResource *res = new (std::nothrow) StreamResource(1); + std::mutex stream_mu; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + SingleOp single_op(res, &stream_mu, stream); + + vector input_buffers; + ge::DataBuffer data_buffer; + data_buffer.data = new char[4]; + data_buffer.length = 4; + data_buffer.placement = 1; + input_buffers.emplace_back(data_buffer); + vector output_buffers; + + single_op.input_sizes_.emplace_back(4); + SingleOpModelParam model_params; + single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); + single_op.args_.resize(1); + EXPECT_EQ(single_op.hybrid_model_executor_, nullptr); + EXPECT_EQ(single_op.running_param_->mem_base, nullptr); + EXPECT_EQ(single_op.tasks_.size(), 0); + EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), SUCCESS); +} + +TEST_F(UtestSingleOp, test_singleop_execute_async2) { + StreamResource *res = new (std::nothrow) StreamResource(1); + std::mutex stream_mu; + rtStream_t stream = nullptr; + rtStreamCreate(&stream, 0); + SingleOp single_op(res, &stream_mu, stream); + + vector input_buffers; + ge::DataBuffer data_buffer; + data_buffer.data = new char[4]; + data_buffer.length = 4; + data_buffer.placement = 1; + input_buffers.emplace_back(data_buffer); + vector output_buffers; + + single_op.input_sizes_.emplace_back(4); + SingleOpModelParam model_params; + single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params)); + single_op.args_.resize(1); + + GeTensorDesc tensor_desc(GeShape({1}), FORMAT_NHWC, DT_UINT64); + single_op.inputs_desc_.emplace_back(tensor_desc); + std::shared_ptr root_model = ge::MakeShared(); + single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model)); + single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(), 0, stream)); + EXPECT_EQ(single_op.running_param_->mem_base, nullptr); + EXPECT_EQ(single_op.tasks_.size(), 0); + EXPECT_EQ(single_op.ExecuteAsync(input_buffers, output_buffers), PARAM_INVALID); +} \ No newline at end of file