!1459 ge code for fuzz build

From: @zhou_lili
Reviewed-by: @xchu42,@youui
Signed-off-by: @youui
pull/1459/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit f49140fcc7

@ -195,6 +195,7 @@ set(TRAIN_SRC_LIST
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/mark_node_unknown_shape_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/partition/dynamic_shape_partition.cc"
"graph/partition/stage_partition.cc"
@ -509,6 +510,7 @@ set(INFER_SRC_LIST
"graph/passes/atomic_addr_clean_pass.cc"
"graph/passes/mark_same_addr_pass.cc"
"graph/passes/mark_graph_unknown_status_pass.cc"
"graph/passes/mark_node_unknown_shape_pass.cc"
"graph/passes/mark_agnostic_pass.cc"
"graph/common/omg_util.cc"
"graph/common/bcast.cc"

@ -114,6 +114,7 @@ OMG_HOST_SRC_FILES := \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/passes/mark_node_unknown_shape_pass.cc \
graph/passes/mark_agnostic_pass.cc \
graph/common/omg_util.cc \
graph/common/bcast.cc \

@ -114,6 +114,7 @@ LIBGE_LOCAL_SRC_FILES := \
graph/passes/atomic_addr_clean_pass.cc \
graph/passes/mark_same_addr_pass.cc \
graph/passes/mark_graph_unknown_status_pass.cc \
graph/passes/mark_node_unknown_shape_pass.cc \
graph/passes/mark_agnostic_pass.cc \
graph/partition/dynamic_shape_partition.cc \
graph/partition/stage_partition.cc \

@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
const int64_t kDynamicDimValue = -2;
const int kDefaultDeviceId = 0;
const int kDefaultJobId = 0;
const int32_t kFuzzBuildPattern = 1;
std::map<ge::OpEngineType, std::string> engine_type_map{
{ge::ENGINE_SYS, kEngineNameDefault},
@ -296,13 +297,44 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso
return SUCCESS;
}
static Status GetFuzzBuildAttrs(const OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model,
GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) {
GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str());
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str());
node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD);
}
(void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
if (!fuzz_build_attrs.empty()) {
GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str());
return SUCCESS;
} else {
GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str());
}
return SUCCESS;
}
static bool HasShapeRange(const vector<GeTensor> &inputs) {
for (const auto &input : inputs) {
vector<pair<int64_t, int64_t>> shape_range;
(void)input.GetTensorDesc().GetShapeRange(shape_range);
if (!shape_range.empty()) {
GELOGD("Has set shape range.");
return true;
}
}
return false;
}
class GeGenerator::Impl {
public:
Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
~Impl() = default;
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models);
Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);
Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff);
@ -742,7 +774,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
bool is_offline) {
bool is_offline, int32_t compile_flag) {
GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size());
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
if (!is_offline) {
@ -764,6 +797,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc);
GE_CHECK_NOTNULL(op_desc_tmp);
bool fuzz_compile_flag = false;
if (!HasShapeRange(inputs) && compile_flag == kFuzzBuildPattern) {
fuzz_compile_flag = true;
}
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) {
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str());
return FAILED;
}
impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag;
// 1. Create ComputeGraph.
string name = ge::CurrentTimeInStr() + "_" + model_file_name;
Graph graph;
@ -810,6 +853,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
} else if (fuzz_compile_flag) {
GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str());
(void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs;
if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) {
GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str());
return FAILED;
}
if (!fuzz_build_attrs.empty()) {
GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs),
return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed.");
}
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
} else {
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
}
@ -825,15 +881,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
* @param [in] vector<GeTensor> &inputs: Operator input data description information.
* @param [in] vector<GeTensor> &outputs: Operator output data description information.
* @param [in] const string &model_file_name: Offline model filename.
* @param [in] compile_flag: op build flag from atc
* @return SUCCESS handle successfully / others handle failed
*/
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, const string &model_file_name) {
const vector<GeTensor> &outputs, const string &model_file_name,
int32_t compile_flag) {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
ModelBufferData model_buff;
OpEngineType engine_type = ENGINE_SYS;
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag);
GELOGI("Finish build single offline model, status: %u", status);
return status;
}
@ -850,7 +908,6 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
* @return SUCCESS handle successfully / others handle failed
*/
// old process will be deleted
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type,
ModelBufferData &model_buff) {
@ -864,7 +921,12 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
ModelBufferData &model_buff) {
return SUCCESS;
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false,
compile_flag);
GELOGI("Finish build single online model, status: %u", status);
return status;
}
Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,

@ -61,6 +61,7 @@
#include "graph/passes/iterator_op_pass.h"
#include "graph/passes/link_gen_mask_nodes_pass.h"
#include "graph/passes/mark_graph_unknown_status_pass.h"
#include "graph/passes/mark_node_unknown_shape_pass.h"
#include "graph/passes/merge_pass.h"
#include "graph/passes/merge_input_memcpy_pass.h"
#include "graph/passes/merge_to_stream_merge_pass.h"
@ -864,6 +865,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
}
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize);
// set fuzz compile flag after origin graph optimize
GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed.");
ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
@ -878,7 +881,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
options_.build_step == BUILD_STEP_AFTER_BUILDER ||
options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB));
if (run_after_optimize_subgraph) {
Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
return ret;
@ -896,6 +899,22 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
return SUCCESS;
}
Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) {
if (!GetLocalOmgContext().fuzz_compile_flag) {
return SUCCESS;
}
for (const auto &node : compute_graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag);
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) {
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str());
return FAILED;
}
}
return SUCCESS;
}
Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass));
@ -2487,6 +2506,8 @@ Status GraphManager::OptimizeStage2(ge::ComputeGraphPtr &compute_graph) {
new (std::nothrow) VariableRefDeleteOpPass))
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass("OptimizeStage2::ControlAttrOptimize::CompileNodesPass",
new (std::nothrow) CompileNodesPass))
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass(
"OptimizeStage2::AfterMergePasses::MarkNodeUnknownShapePass", new(std::nothrow) MarkNodeUnknownShapePass))
GE_CHK_STATUS_RET(pass_for_control_attr_optimize.AddPass(
"OptimizeStage2::AfterMergePasses::MarkGraphUnknownStatusPass", new(std::nothrow) MarkGraphUnknownStatusPass))
GE_CHK_STATUS_RET(

@ -358,6 +358,7 @@ class GraphManager {
ComputeGraphPtr &compute_graph,
GeRootModelPtr &ge_root_model,
uint64_t session_id);
Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph);
Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph,
Graph2SubGraphInfoList &sub_graph_map,

@ -0,0 +1,99 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "graph/passes/mark_node_unknown_shape_pass.h"
#include "graph/utils/node_utils.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/common/local_context.h"
namespace ge {
namespace {
const char *const kEngineNameAiCore = "AIcoreEngine";
const char *const kNeedRefreshShape = "_need_generate";
const char *const kOriginalNode = "_original_node";
const int32_t kDynamicState = -2;
}
Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) {
GE_CHECK_NOTNULL(graph);
if (!GetLocalOmgContext().fuzz_compile_flag) {
return SUCCESS;
}
if (IsAllAicoreSupportDyn(graph)) {
if (UpdateNodeShapeToUnknown(graph) != SUCCESS) {
GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown.");
return FAILED;
}
}
return SUCCESS;
}
bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) {
bool is_all_aicore_support_dyn = false;
for (const auto &node : graph->GetAllNodes()) {
if (node->GetOpDesc() == nullptr) {
continue;
}
if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) {
GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str());
continue;
}
NodePtr original_node = nullptr;
original_node = node->GetOpDesc()->TryGetExtAttr(kOriginalNode, original_node);
if ((original_node == nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) ||
(original_node != nullptr && AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS) &&
!AttrUtils::HasAttr(original_node->GetOpDesc(), kNeedRefreshShape))) {
GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
is_all_aicore_support_dyn = true;
} else {
GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
is_all_aicore_support_dyn = false;
break;
}
}
return is_all_aicore_support_dyn;
}
Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) {
GELOGD("Need to update node shape to dynamic when get fuzz build result.");
for (const auto &node : graph->GetAllNodes()) {
if (NodeUtils::IsConst(*node) || node->GetType() == VARIABLE) {
continue;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i));
if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) {
continue;
}
GELOGD("Update input shape for %s.", node->GetName().c_str());
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (input_desc != nullptr) {
input_desc->SetShape(GeShape({kDynamicState}));
}
}
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
if (output_desc != nullptr) {
GELOGD("Update output shape for %s.", node->GetName().c_str());
output_desc->SetShape(GeShape({kDynamicState}));
}
}
}
return SUCCESS;
}
} // namespace ge

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
#include "graph/graph.h"
#include "inc/graph_pass.h"
namespace ge {
class MarkNodeUnknownShapePass : public GraphPass {
public:
Status Run(ComputeGraphPtr graph);
private:
bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph);
Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_

@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
GE_CHECK_NOTNULL(dst_node->GetOpDesc());
auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
GE_CHECK_NOTNULL(dst_tensor);
bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
dst_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims();
bool is_dynamic = false;
const auto &src_tensor_dims = src_tensor->GetShape().GetDims();
const auto &dst_tensor_dims = dst_tensor->GetShape().GetDims();
if ((std::any_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val < 0 ; }))
|| (std::any_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val < 0; }))) {
GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(),
dst_node->GetName().c_str());
is_dynamic = true;
}
bool is_need_insert_reshape = src_tensor_dims != dst_tensor_dims &&
!is_dynamic;
if (is_need_insert_reshape) {
auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph());
GE_CHECK_NOTNULL(reshape);

@ -54,6 +54,7 @@ Status HybridModelExecutor::Execute(HybridModelExecutor::ExecuteArgs &args) {
"[%s] check input node shape by shape range failed.",
root_graph_item->GetName().c_str());
}
if (context_.global_step != nullptr) {
GE_CHK_RT_RET(rtMemcpyAsync(context_.global_step, sizeof(uint64_t), &context_.iteration,
sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE_EX, context_.stream));
@ -100,8 +101,10 @@ Status HybridModelExecutor::ExecuteGraphInternal(SubgraphExecutor &executor,
GE_CHK_STATUS_RET_NOLOG(prof_mgr.ProfileStepInfo(index_id, model_id, 1, stream_, device_id));
}
if (!model_->IsSingleOp()) {
HYBRID_CHK_STATUS_RET(executor.Synchronize(), "Failed to sync root graph.");
RECORD_MODEL_EXECUTION_EVENT(&context_, "[Synchronize] End");
}
args.outputs.clear();
HYBRID_CHK_STATUS_RET(executor.GetOutputs(args.outputs, args.output_desc), "Failed to get outputs");

@ -168,7 +168,7 @@ Status NodeItem::InitInputsAndOutputs() {
Status NodeItem::ResolveDynamicState() {
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
GELOGD("Node name is %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic),
"[%s] Failed to get shape status.",

@ -22,6 +22,7 @@
#include "hybrid/node_executor/aicore/aicore_task_builder.h"
#include "graph/load/model_manager/tbe_handle_store.h"
#include "graph/types.h"
#include "single_op/task/build_task_utils.h"
using optiling::OpRunInfo;
@ -31,6 +32,7 @@ namespace {
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
constexpr char const *kAttrOpParamSize = "op_para_size";
constexpr char const *kAttrAtomicOpParamSize = "atomic_op_para_size";
std::atomic<std::uint64_t> log_id(0);
} // namespace
TbeHandleHolder::TbeHandleHolder(void *bin_handle)
@ -48,6 +50,12 @@ bool TbeHandleRegistry::AddHandle(std::unique_ptr<TbeHandleHolder> &&holder) {
}
Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def) {
log_name_ = op_desc.GetName() + "_tvmbin";
log_id_ = log_id++;
auto op_desc_ptr = MakeShared<OpDesc>(op_desc);
GE_CHECK_NOTNULL(op_desc_ptr);
auto task_info = BuildTaskUtils::GetTaskInfo(op_desc_ptr);
GELOGI("[TASK_INFO] %lu/%s %s.", log_id_, log_name_.c_str(), task_info.c_str());
GE_CHK_STATUS_RET_NOLOG(InitWithTaskDef(op_desc, task_def));
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(op_desc));
@ -67,6 +75,7 @@ Status AiCoreOpTask::Init(const OpDesc &op_desc, const domi::TaskDef &task_def)
output_indices_to_skip_.push_back(i);
}
}
GELOGI("[TASK_INFO] %lu/%s.", log_id_, log_name_.c_str());
return SUCCESS;
}

@ -114,6 +114,8 @@ class AiCoreOpTask {
uint32_t tiling_key_ = 0;
void *handle_ = nullptr;
bool is_dynamic_ = false;
uint64_t log_id_ = 0;
std::string log_name_;
};
class AtomicAddrCleanOpTask : public AiCoreOpTask {

@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path");
DEFINE_string(display_model_info, "0", "Optional; display model info");
DEFINE_string(performance_mode, "", "Optional; express high compile performance or high execute performance."
"normal: no need to compile, used saved .o files directly;"
"high: need to recompile, high execute performance mode.");
class GFlagUtils {
public:
/**
@ -330,7 +334,8 @@ class GFlagUtils {
"Default value: $HOME/atc_data\n"
" --op_compiler_cache_mode Set the operator compilation cache mode."
"Options are disable(default), enable and force(force to refresh the cache)\n"
" --display_model_info enable for display model info; 0(default): close display, 1: open display");
" --display_model_info enable for display model info; 0(default): close display, 1: open display.\n"
" --performance_mode Set high performance mode of compile or execute.");
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
// Using gflags to analyze input parameters
@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode);
options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path);
options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path);
options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode);
}
domi::Status GenerateSingleOp(const std::string& json_file_path) {
@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
output_path = FLAGS_output + "/";
}
output_path += param.file_name;
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path);
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag);
if (ret != SUCCESS) {
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index);
ret = domi::FAILED;
@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() {
options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path));
options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));
options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode));
// set enable scope fusion passes
SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
// print atc option map

@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format";
constexpr char const *kFileSuffix = ".om";
constexpr char const *kKeyDynamicInput = "dynamic_input";
constexpr char const *kKeyDynamicOutput = "dynamic_output";
constexpr char const *kKeyCompileFlag = "compile_flag";
constexpr int kDumpJsonIndent = 2;
constexpr int kShapeRangePairSize = 2;
constexpr int kShapeRangeLow = 0;
@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
}
void from_json(const Json &j, SingleOpDesc &desc) {
auto op = j.find(kKeyOp);
if (op != j.end()) {
desc.op = j.at(kKeyOp).get<string>();
}
auto input_desc = j.find(kKeyInputDesc);
if (input_desc != j.end()) {
@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) {
if (attr_field != j.end()) {
desc.attrs = attr_field->get<vector<SingleOpAttr>>();
}
auto compile_flag = j.find(kKeyCompileFlag);
if (compile_flag != j.end()) {
desc.compile_flag = compile_flag->get<int32_t>();
}
}
Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
return ret;
}
int32_t compile_flag = 0;
for (const Json &single_op_json : single_op_list_json) {
SingleOpDesc single_op_desc;
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
single_op_desc = single_op_json;
GELOGD("Compile flag is %d.", single_op_desc.compile_flag);
if (single_op_desc.compile_flag == 1) {
compile_flag = single_op_desc.compile_flag;
continue;
}
if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) {
GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!");
REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param.");
@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
if (ret != SUCCESS) {
return ret;
}
param.compile_flag = compile_flag;
op_list.emplace_back(param);
GELOGI("Parse the index[%d] of op success", index);

@ -55,6 +55,7 @@ struct SingleOpDesc {
std::vector<SingleOpTensorDesc> input_desc;
std::vector<SingleOpTensorDesc> output_desc;
std::vector<SingleOpAttr> attrs;
int32_t compile_flag = 0;
};
struct SingleOpBuildParam {
@ -62,6 +63,7 @@ struct SingleOpBuildParam {
std::vector<ge::GeTensor> inputs;
std::vector<ge::GeTensor> outputs;
std::string file_name;
int32_t compile_flag = 0;
};
void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc);

@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32;
const size_t kDataMemAlignUnit = 2;
const string kShapeTypeDynamic = "dynamic";
const string kShapeTypeStatic = "static";
const int64_t kHostMemType = 1;
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
const uint32_t kAlignBytes = 512;
size_t GetAlignedSize(size_t size) {
size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
@ -65,6 +68,72 @@ Status ProfilingTaskInfo(OpTask *op_task, const string &shape_type) {
profiling_manager.ReportProfilingData(model_id, task_desc_info);
return SUCCESS;
}
Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
int64_t total_size = 0;
size_t index = 0;
for (auto &input_buffer : inputs) {
int64_t input_size = 0;
if (input_buffer.placement == kHostMemType) {
GE_CHECK_LE(input_buffer.length, INT64_MAX);
input_size = input_buffer.length;
// input_size pad to 512
GE_CHK_STATUS_RET(CheckInt64AddOverflow(input_size, (kAlignBytes - 1)), "Padding size is beyond the INT64_MAX.");
input_size = ((input_size + kAlignBytes - 1) / kAlignBytes) * kAlignBytes;
inputs_size.emplace_back(index, input_size);
GE_CHK_STATUS_RET(CheckInt64AddOverflow(total_size, input_size), "Total size is beyond the INT64_MAX.");
total_size += input_size;
GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
}
index++;
}
if (total_size > kFuzzDeviceBufferSize) {
GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
return FAILED;
}
return SUCCESS;
}
Status UpdateInputsBufferAddr(StreamResource *stream_resource, rtStream_t stream,
const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
std::vector<DataBuffer> &update_buffers) {
GE_CHECK_NOTNULL(stream_resource);
if (stream_resource->Init() != SUCCESS) {
GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer.");
return FAILED;
}
auto dst_addr = reinterpret_cast<uint8_t *>(stream_resource->GetDeviceBufferAddr());
// copy host mem from input_buffer to device mem of dst_addr
for (const auto &input_size : inputs_size) {
size_t index = input_size.first;
auto size = input_size.second;
GELOGD("Do H2D for %zu input, dst size is %zu, src length is %lu.", index, size, update_buffers[index].length);
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
RT_MEMCPY_HOST_TO_DEVICE_EX, stream));
update_buffers[index].data = dst_addr;
dst_addr = reinterpret_cast<uint8_t *>(dst_addr + size);
}
return SUCCESS;
}
Status InitHybridModelArgs(const std::vector<DataBuffer> &input_buffers,
const std::vector<DataBuffer> &output_buffers,
const std::vector<GeTensorDesc> &inputs_desc,
hybrid::HybridModelExecutor::ExecuteArgs &args) {
for (auto &input : input_buffers) {
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
}
for (auto &output : output_buffers) {
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
}
for (auto &tensor_desc : inputs_desc) {
auto desc = MakeShared<GeTensorDesc>(tensor_desc);
GE_CHECK_NOTNULL(desc);
args.input_desc.emplace_back(desc);
}
return SUCCESS;
}
} // namespace
SingleOp::SingleOp(StreamResource *stream_resource, std::mutex *stream_mutex, rtStream_t stream)
@ -168,13 +237,28 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs,
const std::vector<DataBuffer> &outputs) {
GELOGD("Start SingleOp::ExecuteAsync.");
Status ret = ValidateArgs(inputs, outputs);
if (ret != SUCCESS) {
return ret;
}
GE_CHECK_NOTNULL(stream_resource_);
vector<pair<size_t, uint64_t>> inputs_size;
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size));
std::lock_guard<std::mutex> lk(*stream_mutex_);
vector<DataBuffer> update_buffers = inputs;
if (!inputs_size.empty()) {
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource_, stream_, inputs_size, update_buffers));
}
if (hybrid_model_executor_ != nullptr) {
GELOGD("Execute multi-task single op by hybrid model executor");
hybrid::HybridModelExecutor::ExecuteArgs args;
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, outputs, inputs_desc_, args));
return hybrid_model_executor_->Execute(args);
}
auto current_mem_base = stream_resource_->GetMemoryBase();
if (running_param_->mem_base != current_mem_base) {
running_param_->mem_base = const_cast<uint8_t *>(current_mem_base);
@ -185,7 +269,7 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
task->GetOpdesc()->GetName().c_str());
}
}
ret = UpdateArgs(inputs, outputs);
ret = UpdateArgs(update_buffers, outputs);
if (ret != SUCCESS) {
return ret;
}
@ -252,33 +336,64 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
return SUCCESS;
}
Status DynamicSingleOp::SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
const vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &input_buffers) {
auto op_desc = op_task_->GetOpdesc();
GE_CHECK_NOTNULL(op_desc);
GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str());
for (const auto &input_size : inputs_size) {
size_t index = input_size.first;
auto ge_tensor_desc = input_desc.at(index);
// reconstruct GeTensor by DataBuffer
GeTensorPtr ge_tensor = MakeShared<GeTensor>(ge_tensor_desc);
GE_CHECK_NOTNULL(ge_tensor);
GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
index, ge_tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
return INTERNAL_ERROR;
}
auto tensor_desc = op_desc->MutableInputDesc(index);
GE_CHECK_NOTNULL(tensor_desc);
if (!AttrUtils::SetTensor(tensor_desc, ATTR_NAME_VALUE, ge_tensor)) {
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str());
return FAILED;
}
}
return SUCCESS;
}
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers) {
GELOGD("Start DynamicSingleOp::ExecuteAsync.");
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
vector<pair<size_t, uint64_t>> inputs_size;
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_buffers, inputs_size));
vector<DataBuffer> update_buffers = input_buffers;
std::lock_guard<std::mutex> lk(*stream_mutex_);
if (!inputs_size.empty()) {
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(stream_resource, stream_, inputs_size, update_buffers));
}
if (hybrid_model_executor_ != nullptr) {
GELOGD("Execute multi-task dynamic single op by hybrid model executor");
hybrid::HybridModelExecutor::ExecuteArgs args;
for (auto &input : input_buffers) {
args.inputs.emplace_back(hybrid::TensorValue(input.data, input.length));
}
for (auto &output : output_buffers) {
args.outputs.emplace_back(hybrid::TensorValue(output.data, output.length));
}
for (auto &tensor_desc : input_desc) {
auto desc = MakeShared<GeTensorDesc>(tensor_desc);
GE_CHECK_NOTNULL(desc);
args.input_desc.emplace_back(desc);
}
GE_CHK_STATUS_RET_NOLOG(InitHybridModelArgs(update_buffers, output_buffers, input_desc, args));
return hybrid_model_executor_->Execute(args);
}
std::lock_guard<std::mutex> lk(*stream_mutex_);
GE_CHECK_NOTNULL(op_task_);
if (!inputs_size.empty()) {
GE_CHK_STATUS_RET_NOLOG(SetHostTensorValue(inputs_size, input_desc, input_buffers));
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_));
} else {
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
}
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
return SUCCESS;

@ -59,6 +59,9 @@ class SingleOp {
std::vector<OpTask *> tasks_;
std::vector<std::vector<uintptr_t *>> arg_table_;
std::unique_ptr<SingleOpModelParam> running_param_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
std::vector<GeTensorDesc> inputs_desc_;
};
class DynamicSingleOp {
@ -76,7 +79,8 @@ class DynamicSingleOp {
const std::vector<DataBuffer> &inputs,
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs) const;
Status SetHostTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
std::unique_ptr<OpTask> op_task_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
@ -85,6 +89,7 @@ class DynamicSingleOp {
rtStream_t stream_ = nullptr;
size_t num_inputs_ = 0;
size_t num_outputs_ = 0;
ComputeGraphPtr compute_graph_;
};
} // namespace ge
#endif // GE_SINGLE_OP_SINGLE_OP_H_

@ -43,6 +43,8 @@ using std::vector;
namespace ge {
namespace {
const size_t kDataOutputNum = 1;
const uint32_t kOutputIndexOfData = 0;
constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
auto comp_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
@ -51,7 +53,9 @@ Status IfInferDepend(GeModelPtr &ge_model, bool &flag) {
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
const auto &depends = op_desc->GetOpInferDepends();
if (!depends.empty()) {
bool support_dynamic_shape = false;
(void)AttrUtils::GetBool(op_desc, kAttrSupportDynamicShape, support_dynamic_shape);
if (!depends.empty() && support_dynamic_shape) {
flag = true;
return SUCCESS;
}
@ -462,6 +466,31 @@ Status SingleOpModel::BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTa
*task = aicpucc_task.release();
return SUCCESS;
}
Status SingleOpModel::InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model,
SingleOp &single_op) {
for (const auto &op_desc : data_ops_) {
auto output_tensor_desc = op_desc->GetOutputDesc(kOutputIndexOfData);
GeTensorDesc tensor_desc(output_tensor_desc);
single_op.inputs_desc_.emplace_back(tensor_desc);
GELOGD("Init inputs desc from %s.", op_desc->GetName().c_str());
}
GE_CHK_STATUS_RET_NOLOG(hybrid::NodeExecutorManager::GetInstance().EnsureInitialized());
auto root_model = model_helper_.GetGeRootModel();
GE_CHECK_NOTNULL(root_model);
root_model->SetRootGraph(GraphUtils::GetComputeGraph(ge_model->GetGraph()));
root_model->SetSubgraphInstanceNameToModel(root_model->GetRootGraph()->GetName(), ge_model);
single_op.hybrid_model_.reset(new (std::nothrow)hybrid::HybridModel(root_model));
GE_CHECK_NOTNULL(single_op.hybrid_model_);
GE_CHK_STATUS_RET(single_op.hybrid_model_->Init(true), "[Init][HybridModel]Failed.");
int32_t device_id = 0;
GE_CHK_RT_RET(rtGetDevice(&device_id));
single_op.hybrid_model_executor_.reset(new (std::nothrow)hybrid::HybridModelExecutor(single_op.hybrid_model_.get(),
device_id,
resource.GetStream()));
GE_CHECK_NOTNULL(single_op.hybrid_model_executor_);
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
return SUCCESS;
}
Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
GE_CHK_STATUS_RET_NOLOG(ParseInputsAndOutputs());
@ -469,10 +498,20 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
single_op.running_param_.reset(new (std::nothrow)SingleOpModelParam(model_params_));
GE_CHECK_NOTNULL(single_op.running_param_);
GE_CHK_STATUS_RET_NOLOG(SetInputsAndOutputs(single_op));
auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
bool infer_depend_flag = false;
GE_CHK_STATUS_RET(IfInferDepend(ge_model, infer_depend_flag), "[Check][InferDepend] failed.");
if (infer_depend_flag) {
// construct single_op, do single op with HybridModelExecutor
GELOGD("Init hybrid model params of single op, and will do execute with hybrid model executor.");
return InitHybridModelExecutor(resource, ge_model, single_op);
}
return BuildTaskList(&resource, single_op);
}
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def,
DynamicSingleOp &single_op) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
@ -483,6 +522,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
if (tbe_task->tiling_buffer_ != nullptr) {
GELOGD("tiling buffer is not nullptr.");
tbe_task->stream_resource_ = stream_resource;
}
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
@ -504,10 +547,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
return SUCCESS;
}
Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) {
auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
GE_CHECK_NOTNULL(compute_graph);
single_op.compute_graph_ = compute_graph;
auto tasks = ge_model->GetModelTaskDefPtr()->task();
for (int i = 0; i < tasks.size(); ++i) {
const TaskDef &task_def = tasks[i];
@ -521,7 +567,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
"BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
}
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op));
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
@ -561,6 +607,7 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
single_op.num_outputs_ = netoutput_op_->GetAllInputsSize();
GE_CHK_STATUS_RET_NOLOG(InitModelMem(resource));
model_params_.memory_size = UINT_MAX;
model_params_.graph_is_dynamic = true;
auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
@ -585,6 +632,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
return SUCCESS;
}
return BuildTaskListForDynamicOp(single_op);
return BuildTaskListForDynamicOp(&resource, single_op);
}
} // namespace ge

@ -40,6 +40,7 @@ struct SingleOpModelParam {
std::map<uintptr_t, int> addr_mapping_;
int64_t core_type = 0;
bool graph_is_dynamic = false;
};
class SingleOpModel {
@ -65,15 +66,17 @@ class SingleOpModel {
void ParseOutputNode(const OpDescPtr &op_desc);
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op);
Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def,
DynamicSingleOp &single_op);
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
void ParseArgTable(OpTask *task, SingleOp &op);
Status InitHybridModelExecutor(const StreamResource &resource, const GeModelPtr &ge_model, SingleOp &single_op);
std::string model_name_;
uint32_t model_id_ = 0;

@ -22,6 +22,11 @@
#include "single_op/single_op_model.h"
namespace ge {
namespace {
// limit available device mem size 1M
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
}
StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {
}
@ -39,6 +44,17 @@ StreamResource::~StreamResource() {
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}
if (device_buffer_ != nullptr) {
auto rt_ret = rtFree(device_buffer_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}
Status StreamResource::Init() {
auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed."));
return SUCCESS;
}
SingleOp *StreamResource::GetOperator(const uint64_t key) {

@ -40,6 +40,7 @@ class StreamResource {
rtStream_t GetStream() const;
void SetStream(rtStream_t stream);
Status Init();
SingleOp *GetOperator(const uint64_t key);
DynamicSingleOp *GetDynamicOperator(const uint64_t key);
@ -49,6 +50,9 @@ class StreamResource {
uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true);
uint8_t *MallocWeight(const std::string &purpose, size_t size);
const uint8_t *GetMemoryBase() const;
void *GetDeviceBufferAddr() const {
return device_buffer_;
}
private:
uint8_t *DoMallocMemory(const std::string &purpose,
@ -65,6 +69,7 @@ class StreamResource {
rtStream_t stream_ = nullptr;
std::mutex mu_;
std::mutex stream_mu_;
void *device_buffer_ = nullptr;
};
} // namespace ge

@ -137,7 +137,7 @@ Status OpTask::GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id
return SUCCESS;
}
Status OpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
Status OpTask::UpdateRunInfo() {
return UNSUPPORTED;
}
@ -200,14 +200,14 @@ void TbeOpTask::SetHandle(void *handle) {
Status TbeOpTask::LaunchKernel(rtStream_t stream) {
GELOGD("To invoke rtKernelLaunch. task = %s, block_dim = %u", this->stub_name_.c_str(), block_dim_);
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
auto ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_), sm_desc, stream);
auto ret = DoLaunchKernel(stream);
int retry_times = 0;
while (ret != RT_ERROR_NONE && retry_times < kLaunchRetryTimes) {
retry_times++;
GELOGW("Retry after %d ms, retry_times: %d", kSleepTime, retry_times);
std::this_thread::sleep_for(std::chrono::milliseconds(kSleepTime));
ret = rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, sm_desc, stream);
ret = DoLaunchKernel(stream);
}
if (ret != RT_ERROR_NONE) {
@ -220,8 +220,7 @@ Status TbeOpTask::LaunchKernel(rtStream_t stream) {
return SUCCESS;
}
Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const vector<GeTensorDesc> &output_desc) {
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc));
Status TbeOpTask::UpdateRunInfo() {
// invoke OpParaCalculate
GELOGD("Start to invoke OpParaCalculate.");
optiling::OpRunInfo run_info;
@ -235,10 +234,9 @@ Status TbeOpTask::UpdateRunInfo(const vector<GeTensorDesc> &input_desc, const ve
block_dim_ = run_info.block_dim;
tiling_data_ = run_info.tiling_data.str();
tiling_key_ = run_info.tiling_key;
run_info_workspaces_ = run_info.workspaces;
GELOGD("Done invoking OpParaCalculate successfully. block_dim = %u, tiling size = %zu, tiling_key = %u", block_dim_,
tiling_data_.size(), tiling_key_);
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info.workspaces), "[Allocate][Workspaces] failed.");
return SUCCESS;
}
@ -288,14 +286,33 @@ Status TbeOpTask::UpdateNodeByShape(const vector<GeTensorDesc> &input_desc, cons
return SUCCESS;
}
void TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size) {
Status TbeOpTask::EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size) {
if (tiling_buffer != nullptr) {
uintptr_t *arg_base = nullptr;
size_t arg_num = 0;
GetIoAddr(arg_base, arg_num);
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
uint32_t inputs_num = node->GetOpDesc()->GetInputsSize();
uint32_t outputs_num = node->GetOpDesc()->GetOutputsSize();
uint32_t workspace_nums = node->GetOpDesc()->GetWorkspace().size();
uint32_t tiling_index = inputs_num + outputs_num + workspace_nums;
if (arg_num == 0 || arg_num < tiling_index) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Check][Size]Tiling index %u, arg number %zu is invalid.",
tiling_index, arg_num);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
arg_base[tiling_index] = reinterpret_cast<uintptr_t>(tiling_buffer);
}
node_ = node;
tiling_buffer_ = tiling_buffer;
max_tiling_size_ = max_tiling_size;
return SUCCESS;
}
Status TbeOpTask::AllocateWorkspaces(const vector<int64_t> &workspace_sizes) {
static const std::string kPurpose("malloc workspace memory for dynamic op.");
workspaces_.clear();
if (workspace_sizes.empty()) {
GELOGD("No need to allocate workspace.");
return SUCCESS;
@ -333,8 +350,10 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers,
rtStream_t stream) {
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
GELOGD("[%s] Start to launch kernel", node_->GetName().c_str());
GE_CHK_STATUS_RET_NOLOG(UpdateNodeByShape(input_desc, output_desc));
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo());
GE_CHK_STATUS_RET(AllocateWorkspaces(run_info_workspaces_), "[Allocate][Workspaces] failed.");
std::vector<void *> args;
for (auto &buffer : input_buffers) {
args.emplace_back(buffer.data);
@ -354,6 +373,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
args.emplace_back(tiling_buffer_);
}
GELOGD("Dst size is %zu, src size is %zu.", arg_size_, args.size() * sizeof(void *));
// node with workspace: build can not get size of workspace, need to update arg_size_ when execute
if (arg_size_ < (args.size() * sizeof(void *))) {
size_t temp_size = args.size() * sizeof(void *);
GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size);
args_.reset(new(std::nothrow) uint8_t[temp_size]());
GE_CHECK_NOTNULL(args_);
arg_size_ = temp_size;
}
if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str());
@ -361,17 +389,22 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
}
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
GE_CHK_STATUS_RET(DoLaunchKernel(stream), "Failed to do launch kernel.");
return SUCCESS;
}
Status TbeOpTask::DoLaunchKernel(rtStream_t stream) {
auto *sm_desc = reinterpret_cast<rtSmDesc_t *>(sm_desc_);
if (handle_ == nullptr) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
sm_desc, stream));
} else {
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
stream, kernel_info.c_str()));
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(),
static_cast<uint32_t>(arg_size_), sm_desc, stream, kernel_info.c_str()));
}
return SUCCESS;
}

@ -30,6 +30,7 @@
#include "cce/aicpu_engine_struct.h"
#include "hybrid/node_executor/aicpu/aicpu_ext_info.h"
#include "init/gelib.h"
#include "register/op_tiling.h"
namespace ge {
class StreamResource;
@ -39,8 +40,7 @@ class OpTask {
OpTask() = default;
virtual ~OpTask() = default;
virtual Status LaunchKernel(rtStream_t stream) = 0;
virtual Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc);
virtual Status UpdateRunInfo();
virtual Status UpdateArgTable(const SingleOpModelParam &param);
void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
@ -81,22 +81,23 @@ class TbeOpTask : public OpTask {
void SetKernelWithHandleArgs(std::unique_ptr<uint8_t[]> &&args, size_t arg_size, uint32_t block_dim,
const OpDescPtr &op_desc, const domi::KernelDefWithHandle& kernel_def_with_handle);
Status UpdateRunInfo(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc) override;
Status UpdateRunInfo() override;
const void *GetArgs() const;
size_t GetArgSize() const;
const std::string &GetStubName() const;
void EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, size_t max_tiling_size);
Status EnableDynamicSupport(const NodePtr &node, void *tiling_buffer, uint32_t max_tiling_size);
const std::string &GetTaskType() const override;
void SetHandle(void *handle);
private:
friend class SingleOpModel;
friend class TbeTaskBuilder;
static Status UpdateTensorDesc(const GeTensorDesc &src_tensor, GeTensorDesc &dst_tensor);
Status UpdateNodeByShape(const vector<GeTensorDesc> &input_desc,
const vector<GeTensorDesc> &output_desc);
Status AllocateWorkspaces(const std::vector<int64_t> &workspace_sizes);
Status DoLaunchKernel(rtStream_t stream);
const void *stub_func_ = nullptr;
std::unique_ptr<uint8_t[]> args_;
@ -108,6 +109,7 @@ class TbeOpTask : public OpTask {
void *tiling_buffer_ = nullptr;
uint32_t max_tiling_size_ = 0;
std::string tiling_data_;
std::vector<int64_t> run_info_workspaces_;
std::vector<void *> workspaces_;
NodePtr node_;

@ -308,92 +308,65 @@ Status TbeTaskBuilder::GetSmDesc(void **sm_desc, const SingleOpModelParam &param
}
Status TbeTaskBuilder::SetKernelArgs(TbeOpTask &task, const SingleOpModelParam &param, const OpDescPtr &op_desc) {
size_t arg_size = kernel_def_.args_size();
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
bool is_task_all_kernel = (task_type == RT_MODEL_TASK_ALL_KERNEL);
size_t arg_size = 0;
std::unique_ptr<uint8_t[]> args = nullptr;
if (is_task_all_kernel) {
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_ALL_KERNEL.", op_desc->GetName().c_str());
arg_size = kernel_def_with_handle_.args_size();
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy failed, size = %zu, ret = %d",
arg_size, static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size,
RT_MEMCPY_HOST_TO_HOST))
} else {
GELOGD("SetKernelArgs of %s in branch of RT_MODEL_TASK_KERNEL.", op_desc->GetName().c_str());
arg_size = kernel_def_.args_size();
args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);
GE_CHK_RT_RET(rtMemcpy(args.get(), arg_size, kernel_def_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST))
}
const domi::KernelContext &context = kernel_def_.context();
const domi::KernelContext &context = task_type == RT_MODEL_TASK_ALL_KERNEL ?
kernel_def_with_handle_.context() : kernel_def_.context();
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint16_t offset = *args_offset_tmp;
bool is_dynamic = false;
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
if (is_dynamic) {
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
} else {
// copy args
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);
return SUCCESS;
}
Status TbeTaskBuilder::SetKernelWithHandleArgs(TbeOpTask &task, const SingleOpModelParam &param,
const OpDescPtr &op_desc) {
size_t arg_size = kernel_def_with_handle_.args_size();
auto args = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[arg_size]);
GE_CHECK_NOTNULL(args);
GE_CHK_RT_RET(rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST));
auto rt_ret = rtMemcpy(args.get(), arg_size, kernel_def_with_handle_.args().data(), arg_size, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "[Update][Kernel_def:args]rtMemcpy failed, size = %zu, ret = %d",
arg_size, static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, size = %zu, ret = %d", arg_size, static_cast<int>(rt_ret));
return rt_ret;
if (is_task_all_kernel) {
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
kernel_def_with_handle_);
} else {
task.SetKernelArgs(std::move(args), arg_size, kernel_def_.block_dim(), op_desc);
}
const domi::KernelContext &context = kernel_def_with_handle_.context();
const auto *args_offset_tmp = reinterpret_cast<const uint16_t *>(context.args_offset().data());
uint16_t offset = *args_offset_tmp;
bool is_dynamic = false;
(void)AttrUtils::GetBool(op_desc_, kAttrSupportDynamicShape, is_dynamic);
if (is_dynamic) {
GE_CHK_STATUS_RET_NOLOG(InitTilingInfo(task));
} else {
// copy args
std::vector<void *> tensor_device_addr_vec = BuildTaskUtils::GetKernelArgs(op_desc_, param);
void *src_addr = reinterpret_cast<void *>(tensor_device_addr_vec.data());
uint64_t src_len = sizeof(void *) * tensor_device_addr_vec.size();
rt_ret = rtMemcpy(args.get() + offset, arg_size - offset, src_addr, src_len, RT_MEMCPY_HOST_TO_HOST);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "[Update][Kernel_def:args] rtMemcpy addresses failed, ret = %d", static_cast<int>(rt_ret));
REPORT_INNER_ERROR("E19999", "rtMemcpy failed, ret = %d", static_cast<int>(rt_ret));
return rt_ret;
if (!param.graph_is_dynamic && task.tiling_buffer_ != nullptr) {
GELOGD("Need to update run info when graph is static with dynamic node: %s.", op_desc->GetName().c_str());
task.UpdateRunInfo();
GE_CHK_RT_RET(rtMemcpy(task.tiling_buffer_, task.max_tiling_size_, task.tiling_data_.data(),
task.tiling_data_.size(), RT_MEMCPY_HOST_TO_DEVICE));
}
}
task.SetKernelWithHandleArgs(std::move(args), arg_size, kernel_def_with_handle_.block_dim(), op_desc,
kernel_def_with_handle_);
return SUCCESS;
}
Status TbeTaskBuilder::BuildTask(TbeOpTask &task, const SingleOpModelParam &param) {
GELOGD("Build tbe task begin");
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
auto ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? SetKernelWithHandleArgs(task, param, op_desc_) :
SetKernelArgs(task, param, op_desc_);
auto ret = SetKernelArgs(task, param, op_desc_);
if (ret != SUCCESS) {
return ret;
}
auto task_type = static_cast<rtModelTaskType_t>(task_def_.type());
ret = task_type == RT_MODEL_TASK_ALL_KERNEL ? RegisterKernelWithHandle(task, param) :
RegisterKernel(task, param);
task.SetHandle(handle_);
@ -437,7 +410,7 @@ Status TbeTaskBuilder::InitTilingInfo(TbeOpTask &task) {
GELOGD("[%s] Done allocating tiling buffer, size=%ld.", op_desc_->GetName().c_str(), max_size);
}
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<size_t>(max_size));
task.EnableDynamicSupport(node_, tiling_buffer, static_cast<uint32_t>(max_size));
return SUCCESS;
}
} // namespace ge

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save