Pre Merge pull request !1418 from 周莉莉/master

pull/1418/MERGE
周莉莉 4 years ago committed by Gitee
commit e4f9335560

@ -53,6 +53,7 @@ constexpr char const *kAttrSupportDynamicShape = "support_dynamicshape";
const int64_t kDynamicDimValue = -2;
const int kDefaultDeviceId = 0;
const int kDefaultJobId = 0;
const int32_t kFuzzBuildPattern = 1;
std::map<ge::OpEngineType, std::string> engine_type_map{
{ge::ENGINE_SYS, kEngineNameDefault},
@ -296,13 +297,60 @@ static Status ResetTensorVecShape(const vector<GeTensor> &inputs, vector<GeTenso
return SUCCESS;
}
static Status GetFuzzBuildAttrs(OpDescPtr &op_desc, const GeRootModelPtr &ge_root_model,
GeAttrValue::LIST_NAMED_ATTRS &fuzz_build_attrs) {
GELOGD("Start get fuzz build attrs of %s.", op_desc->GetName().c_str());
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
GE_CHECK_NOTNULL(node);
GE_CHECK_NOTNULL(node->GetOpDesc());
GELOGD("Delete fuzz build attr of %s after build.", node->GetName().c_str());
node->GetOpDesc()->DelAttr(ATTR_NAME_FUZZ_BUILD);
}
(void)AttrUtils::GetListNamedAttrs(op_desc, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
if (!fuzz_build_attrs.empty()) {
GELOGD("%s has split, get ATTR_NAME_FUZZ_BUILD_RES_ATTRS directly.", op_desc->GetName().c_str());
return SUCCESS;
}
bool all_aicore_support_dyn = false;
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
if (node->GetOpDesc()->GetOpKernelLibName() != kAIcoreEngine) {
continue;
}
if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) {
all_aicore_support_dyn = true;
} else {
all_aicore_support_dyn = false;
GELOGD("%s kernel type is %s, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str(),
node->GetOpDesc()->GetOpKernelLibName().c_str());
break;
}
}
if (all_aicore_support_dyn) {
GELOGD("All aicore nodes from %s is support dynamic.", ge_root_model->GetRootGraph()->GetName().c_str());
for (const auto &node : ge_root_model->GetRootGraph()->GetAllNodes()) {
if (node->GetName() == op_desc->GetName()) {
(void)AttrUtils::GetListNamedAttrs(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs);
if (fuzz_build_attrs.empty()) {
GELOGE(FAILED, "[Get][ATTR_NAME_FUZZ_BUILD_RES_ATTRS] %s should set fuzz ret.", op_desc->GetName().c_str());
return FAILED;
}
}
}
}
if (fuzz_build_attrs.empty()) {
GELOGW("%s build with fuzz build pattern, but not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", op_desc->GetName().c_str());
}
return SUCCESS;
}
class GeGenerator::Impl {
public:
Impl(OmgContext &omg_context) : omg_context_(omg_context) {}
~Impl() = default;
Status BuildModel(const Graph &graph, const vector<GeTensor> &inputs, GeRootModelPtr &ge_models);
bool HasSetShapeRange(const vector<GeTensor> &inputs);
Status SaveModel(const string &file_name_prefix, GeModelPtr &models, ModelBufferData &model);
Status SaveRootModel(const string &file_name_prefix, GeRootModelPtr &model, ModelBufferData &model_buff);
@ -742,7 +790,8 @@ Status GeGenerator::CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor>
Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
bool is_offline) {
bool is_offline, int32_t compile_flag) {
GELOGD("Inputs size is %zu, outputs size is %zu.", inputs.size(), outputs.size());
GE_CHECK_NOTNULL_EXEC(impl_, return PARAM_INVALID);
impl_->is_offline_ = is_offline;
if (!is_offline) {
@ -764,6 +813,16 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
OpDescPtr op_desc_tmp = AttrUtils::CloneOpDesc(op_desc);
GE_CHECK_NOTNULL(op_desc_tmp);
bool fuzz_compile_flag = false;
if (!(impl_->HasSetShapeRange(inputs)) && (compile_flag == kFuzzBuildPattern)) {
fuzz_compile_flag = true;
}
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, fuzz_compile_flag)) {
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD] Failed to set attr for %s.", op_desc->GetName().c_str());
return FAILED;
}
impl_->omg_context_.fuzz_compile_flag = fuzz_compile_flag;
// 1. Create ComputeGraph.
string name = ge::CurrentTimeInStr() + "_" + model_file_name;
Graph graph;
@ -810,6 +869,19 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
GE_CHK_STATUS_RET_NOLOG(ResetTensorVecShape(outputs, outputs_dynamic));
GE_CHK_STATUS_RET_NOLOG(
impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs_dynamic, outputs_dynamic));
} else if (fuzz_compile_flag) {
GELOGD("Get fuzz build result of %s.", op_desc->GetName().c_str());
(void)AttrUtils::SetInt(ge_model, ATTR_NAME_BUILD_MODE, fuzz_compile_flag);
GeAttrValue::LIST_NAMED_ATTRS fuzz_build_attrs;
if (GetFuzzBuildAttrs(op_desc, ge_root_model, fuzz_build_attrs) != SUCCESS) {
GELOGE(FAILED, "[Get][FuzzRet]Failed to get fuzz build result of %s.", op_desc->GetName().c_str());
return FAILED;
}
if (!fuzz_build_attrs.empty()) {
GE_CHK_BOOL_EXEC(AttrUtils::SetListNamedAttrs(ge_model, ATTR_NAME_FUZZ_BUILD_RES_ATTRS, fuzz_build_attrs),
return FAILED, "Set ATTR_NAME_FUZZ_BUILD_RES_ATTRS failed.");
}
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
} else {
GE_CHK_STATUS_RET_NOLOG(impl_->SaveParams(ge_model, op_desc_tmp->GetType(), op_attrs, inputs, outputs));
}
@ -825,15 +897,17 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
* @param [in] vector<GeTensor> &inputs: Operator input data description information.
* @param [in] vector<GeTensor> &outputs: Operator output data description information.
* @param [in] const string &model_file_name: Offline model filename.
* @param [in] compile_flag: op build flag from atc
* @return SUCCESS handle successfully / others handle failed
*/
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, const string &model_file_name) {
const vector<GeTensor> &outputs, const string &model_file_name,
int32_t compile_flag) {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
GELOGI("Start to build single op offline model, input size: %zu, output size: %zu", inputs.size(), outputs.size());
ModelBufferData model_buff;
OpEngineType engine_type = ENGINE_SYS;
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true);
Status status = BuildSingleOp(op_desc, inputs, outputs, model_file_name, engine_type, model_buff, true, compile_flag);
GELOGI("Finish build single offline model, status: %u", status);
return status;
}
@ -850,23 +924,17 @@ Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor
* @return SUCCESS handle successfully / others handle failed
*/
// old process will be deleted
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type,
const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
ModelBufferData &model_buff) {
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kOther);
GELOGI("Start to build single op online, input size: %zu, output size: %zu", inputs.size(), outputs.size());
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false);
Status status = BuildSingleOp(op_desc, inputs, outputs, kFileNameSuffix, engine_type, model_buff, false,
compile_flag);
GELOGI("Finish build single online model, status: %u", status);
return status;
}
Status GeGenerator::BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, OpEngineType engine_type, int32_t compile_flag,
ModelBufferData &model_buff) {
return SUCCESS;
}
Status GeGenerator::BuildSingleOpGraph(OpDescPtr &op_desc, const vector<GeTensor> &inputs,
const vector<GeTensor> &outputs, std::string graph_name, Graph &graph) {
ge::ComputeGraphPtr compute_graph = MakeShared<ComputeGraph>(graph_name);
@ -976,6 +1044,18 @@ Status GeGenerator::Impl::SaveRootModel(const string &file_name_prefix, GeRootMo
return SUCCESS;
}
bool GeGenerator::Impl::HasSetShapeRange(const vector<GeTensor> &inputs) {
for (const auto &input : inputs) {
vector<pair<int64_t, int64_t>> shape_range;
(void)input.GetTensorDesc().GetShapeRange(shape_range);
if (!shape_range.empty()) {
GELOGD("Has set shape range.");
return true;
}
}
return false;
}
Status GeGenerator::Impl::BuildModel(const Graph &graph, const vector<GeTensor> &inputs,
GeRootModelPtr &ge_root_model) {
static std::atomic<GraphId> atomic_graph_id(0);

@ -863,6 +863,8 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
}
ErrorManager::GetInstance().SetStage(ErrorMessage::kModelCompile, ErrorMessage::kPrepareOptimize);
// set fuzz compile flag after origin graph optimize
GE_CHK_STATUS_RET(SetFuzzCompileFlag(compute_graph), "Set fuzz compile flag failed.");
ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
@ -877,7 +879,7 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
options_.build_step == BUILD_STEP_AFTER_BUILDER ||
options_.build_step == BUILD_STEP_AFTER_BUILDER_SUB));
if (run_after_optimize_subgraph) {
Status ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
ret = PreRunAfterOptimizeSubGraph(graph_node, compute_graph, ge_root_model, session_id);
if (ret != SUCCESS) {
GELOGE(ret, "Run PreRunAfterOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str());
return ret;
@ -895,6 +897,19 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vector<Ge
return SUCCESS;
}
Status GraphManager::SetFuzzCompileFlag(ComputeGraphPtr &compute_graph) {
for (const auto &node : compute_graph->GetAllNodes()) {
OpDescPtr op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
GELOGD("Fuzz compile flag is %d.", GetLocalOmgContext().fuzz_compile_flag);
if (!AttrUtils::SetBool(op_desc, ATTR_NAME_FUZZ_BUILD, GetLocalOmgContext().fuzz_compile_flag)) {
GELOGE(FAILED, "[Set][ATTR_NAME_FUZZ_BUILD]Failed to set fuzz build attr to %s.", op_desc->GetName().c_str());
return FAILED;
}
}
return SUCCESS;
}
Status GraphManager::SubexpressionMigration(ComputeGraphPtr &compute_graph) {
PassManager pass_manager;
GE_CHK_STATUS_RET(pass_manager.AddPass("SubexpressionMigrationPass", new (std::nothrow) SubexpressionMigrationPass));

@ -358,6 +358,7 @@ class GraphManager {
ComputeGraphPtr &compute_graph,
GeRootModelPtr &ge_root_model,
uint64_t session_id);
Status SetFuzzCompileFlag(ComputeGraphPtr &compute_graph);
Status CopySubGraphAndMarkFusion(const ComputeGraphPtr &compute_graph,
Graph2SubGraphInfoList &sub_graph_map,

@ -0,0 +1,88 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "graph/passes/mark_node_unknown_shape_pass.h"
#include "graph/utils/node_utils.h"
#include "graph/debug/ge_attr_define.h"
namespace ge {
namespace {
const char *const kEngineNameAiCore = "AIcoreEngine";
const int32_t kDynamicState = -2;
}
Status MarkNodeUnknownShapePass::Run(ComputeGraphPtr graph) {
GE_CHECK_NOTNULL(graph);
if (IsAllAicoreSupportDyn(graph)) {
if (UpdateNodeShapeToUnknown(graph) != SUCCESS) {
GELOGE(FAILED, "[Update][Node_Shape]Failed to update node shape to unknown.");
return FAILED;
}
}
return SUCCESS;
}
bool MarkNodeUnknownShapePass::IsAllAicoreSupportDyn(ComputeGraphPtr &graph) {
bool is_all_aicore_support_dyn = false;
for (const auto &node : graph->GetAllNodes()) {
if (node->GetOpDesc() == nullptr) {
continue;
}
if (node->GetOpDesc()->GetOpKernelLibName() != kEngineNameAiCore) {
GELOGD("Kernel of %s is %s.", node->GetName().c_str(), node->GetOpDesc()->GetOpKernelLibName().c_str());
continue;
}
if (AttrUtils::HasAttr(node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS)) {
GELOGD("%s has set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
is_all_aicore_support_dyn = true;
} else {
GELOGD("%s has not set ATTR_NAME_FUZZ_BUILD_RES_ATTRS.", node->GetName().c_str());
is_all_aicore_support_dyn = false;
break;
}
}
return is_all_aicore_support_dyn;
}
Status MarkNodeUnknownShapePass::UpdateNodeShapeToUnknown(ComputeGraphPtr &graph) {
GELOGD("Need to update node shape to dynamic when get fuzz build result.");
for (const auto &node : graph->GetAllNodes()) {
if (NodeUtils::IsConst(*node)) {
continue;
}
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
auto src_node = NodeUtils::GetInDataNodeByIndex(*node, static_cast<int>(i));
if (src_node != nullptr && (NodeUtils::IsConst(*src_node) || src_node->GetType() == VARIABLE)) {
continue;
}
GELOGD("Update shape for %s.", node->GetName().c_str());
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
if (input_desc != nullptr) {
input_desc->SetShape(GeShape({kDynamicState}));
}
}
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
if (output_desc != nullptr) {
output_desc->SetShape(GeShape({kDynamicState}));
}
}
}
return SUCCESS;
}
} // namespace ge

@ -0,0 +1,32 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
#define GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_
#include "graph/graph.h"
#include "inc/graph_pass.h"
namespace ge {
class MarkNodeUnknownShapePass : public GraphPass {
public:
Status Run(ComputeGraphPtr graph);
private:
bool IsAllAicoreSupportDyn(ComputeGraphPtr &graph);
Status UpdateNodeShapeToUnknown(ComputeGraphPtr &graph);
};
} // namespace ge
#endif // GE_GRAPH_PASSES_MARK_NODE_UNKNOWN_SHAPE_PASS_H_

@ -55,9 +55,17 @@ Status InsertReshapeIfNeed(const NodePtr &node) {
GE_CHECK_NOTNULL(dst_node->GetOpDesc());
auto dst_tensor = dst_node->GetOpDesc()->GetInputDescPtr(dst_anchor->GetIdx());
GE_CHECK_NOTNULL(dst_tensor);
bool is_need_insert_reshape = src_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
dst_tensor->GetShape().GetDims() != UNKNOWN_RANK &&
src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims();
bool is_dynamic = false;
auto src_tensor_dims = src_tensor->GetShape().GetDims();
auto dst_tensor_dims = dst_tensor->GetShape().GetDims();
if ((!(std::all_of(src_tensor_dims.begin(), src_tensor_dims.end(), [](int64_t val) { return val >= 0 ; })))
|| (!(std::all_of(dst_tensor_dims.begin(), dst_tensor_dims.end(), [](int64_t val) { return val >= 0; })))) {
GELOGD("No need to insert reshape node between %s nad %s.", node->GetName().c_str(),
dst_node->GetName().c_str());
is_dynamic = true;
}
bool is_need_insert_reshape = (src_tensor->GetShape().GetDims() != dst_tensor->GetShape().GetDims()) &&
(!is_dynamic);
if (is_need_insert_reshape) {
auto reshape = CreateReshape(src_tensor, dst_tensor, node->GetOwnerComputeGraph());
GE_CHECK_NOTNULL(reshape);

@ -225,6 +225,7 @@ Status SubgraphExecutor::PrepareNodes(int group) {
if (node_item.node_type != NETOUTPUT) {
// only do shape inference and compilation for nodes with dynamic shapes.
if (node_item.is_dynamic) {
GELOGD("Need to reinfershape when %s is dynamic.", node_item.NodeName().c_str());
auto prepare_future = pre_run_pool_.commit([this, p_node_state]() -> Status {
GetContext().SetSessionId(context_->session_id);
GetContext().SetContextId(context_->context_id);

@ -168,12 +168,12 @@ Status NodeItem::InitInputsAndOutputs() {
Status NodeItem::ResolveDynamicState() {
(void) AttrUtils::GetBool(op_desc, ATTR_NAME_FORCE_UNKNOWN_SHAPE, is_dynamic);
GELOGD("node name = %s, is_dynamic = %d.", this->node_name.c_str(), is_dynamic);
if (!is_dynamic) {
GE_CHK_STATUS_RET(NodeUtils::GetNodeUnknownShapeStatus(*node, is_dynamic),
"[%s] Failed to get shape status.",
node->GetName().c_str());
}
GELOGD("Resolve dynamic state of %s, dynamic state is %d.", this->node_name.c_str(), is_dynamic);
return SUCCESS;
}

@ -216,6 +216,10 @@ DEFINE_string(op_bank_path, "", "Optional; op bank path");
DEFINE_string(display_model_info, "0", "Optional; display model info");
DEFINE_string(perforemance_mode, "", "Optional; express high compile performance or high execute performance."
"normal: no need to compile, used saved .o files directly;"
"high: need to recompile, high execute performance mode.");
class GFlagUtils {
public:
/**
@ -330,7 +334,8 @@ class GFlagUtils {
"Default value: $HOME/atc_data\n"
" --op_compiler_cache_mode Set the operator compilation cache mode."
"Options are disable(default), enable and force(force to refresh the cache)\n"
" --display_model_info enable for display model info; 0(default): close display, 1: open display");
" --display_model_info enable for display model info; 0(default): close display, 1: open display"
"--performance_mode Set high performance mode of compile or execute when op compile");
gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
// Using gflags to analyze input parameters
@ -1078,6 +1083,7 @@ static void SetEnvForSingleOp(std::map<string, string> &options) {
options.emplace(ge::OP_COMPILER_CACHE_MODE, FLAGS_op_compiler_cache_mode);
options.emplace(ge::MDL_BANK_PATH_FLAG, FLAGS_mdl_bank_path);
options.emplace(ge::OP_BANK_PATH_FLAG, FLAGS_op_bank_path);
options.emplace(ge::PERFORMANCE_MODE, FLAGS_performance_mode);
}
domi::Status GenerateSingleOp(const std::string& json_file_path) {
@ -1124,7 +1130,7 @@ domi::Status GenerateSingleOp(const std::string& json_file_path) {
output_path = FLAGS_output + "/";
}
output_path += param.file_name;
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path);
ret = generator.BuildSingleOpModel(param.op_desc, param.inputs, param.outputs, output_path, param.compile_flag);
if (ret != SUCCESS) {
DOMI_LOGE("Compile op failed. ge ret = %u, op index = %d", ret, index);
ret = domi::FAILED;
@ -1229,6 +1235,8 @@ domi::Status GenerateOmModel() {
options.insert(std::pair<string, string>(string(ge::OP_BANK_PATH_FLAG), FLAGS_op_bank_path));
options.insert(std::pair<string, string>(string(ge::DISPLAY_MODEL_INFO), FLAGS_display_model_info));
options.insert(std::pair<string, string>(string(ge::PERFORMANCE_MODE), FLAGS_performance_mode));
// set enable scope fusion passes
SetEnableScopeFusionPasses(FLAGS_enable_scope_fusion_passes);
// print atc option map

@ -53,6 +53,7 @@ constexpr char const *kKeyOriginFormat = "origin_format";
constexpr char const *kFileSuffix = ".om";
constexpr char const *kKeyDynamicInput = "dynamic_input";
constexpr char const *kKeyDynamicOutput = "dynamic_output";
constexpr char const *kKeyCompileFlag = "compile_flag";
constexpr int kDumpJsonIndent = 2;
constexpr int kShapeRangePairSize = 2;
constexpr int kShapeRangeLow = 0;
@ -265,7 +266,10 @@ void from_json(const Json &j, SingleOpAttr &attr) {
}
void from_json(const Json &j, SingleOpDesc &desc) {
desc.op = j.at(kKeyOp).get<string>();
auto op = j.find(kKeyOp);
if (op != j.end()) {
desc.op = j.at(kKeyOp).get<string>();
}
auto input_desc = j.find(kKeyInputDesc);
if (input_desc != j.end()) {
@ -281,6 +285,11 @@ void from_json(const Json &j, SingleOpDesc &desc) {
if (attr_field != j.end()) {
desc.attrs = attr_field->get<vector<SingleOpAttr>>();
}
auto compile_flag = j.find(kKeyCompileFlag);
if (compile_flag != j.end()) {
desc.compile_flag = compile_flag->get<int32_t>();
}
}
Status SingleOpParser::ReadJsonFile(const std::string &file, Json &json_obj) {
@ -583,10 +592,16 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
return ret;
}
int32_t compile_flag = 0;
for (const Json &single_op_json : single_op_list_json) {
SingleOpDesc single_op_desc;
GELOGI("Parsing op[%d], jsonStr = %s", index, single_op_json.dump(kDumpJsonIndent).c_str());
single_op_desc = single_op_json;
GELOGD("Compile flag is %d.", single_op_desc.compile_flag);
if (single_op_desc.compile_flag == 1) {
compile_flag = single_op_desc.compile_flag;
continue;
}
if (UpdateDynamicTensorName(single_op_desc.input_desc) != SUCCESS) {
GELOGE(FAILED, "[Update][DynamicTensorName] failed for invalid input param!");
REPORT_CALL_ERROR("E19999", "UpdateDynamicTensorName failed for invalid input param.");
@ -604,6 +619,7 @@ Status SingleOpParser::ParseSingleOpList(const std::string &file, std::vector<Si
if (ret != SUCCESS) {
return ret;
}
param.compile_flag = compile_flag;
op_list.emplace_back(param);
GELOGI("Parse the index[%d] of op success", index);

@ -55,6 +55,7 @@ struct SingleOpDesc {
std::vector<SingleOpTensorDesc> input_desc;
std::vector<SingleOpTensorDesc> output_desc;
std::vector<SingleOpAttr> attrs;
int32_t compile_flag = 0;
};
struct SingleOpBuildParam {
@ -62,6 +63,7 @@ struct SingleOpBuildParam {
std::vector<ge::GeTensor> inputs;
std::vector<ge::GeTensor> outputs;
std::string file_name;
int32_t compile_flag = 0;
};
void from_json(const nlohmann::json &json, SingleOpTensorDesc &desc);

@ -34,6 +34,9 @@ const size_t kDataMemAlignSize = 32;
const size_t kDataMemAlignUnit = 2;
const string kShapeTypeDynamic = "dynamic";
const string kShapeTypeStatic = "static";
const int64_t kHostMemType = 1;
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
const uint32_t kAlignBytes = 512;
size_t GetAlignedSize(size_t size) {
size_t aligned_size = (size + kDataMemAlignUnit * kDataMemAlignSize - 1) / kDataMemAlignSize * kDataMemAlignSize;
@ -166,15 +169,67 @@ Status SingleOp::UpdateArgs(const std::vector<DataBuffer> &inputs, const std::ve
return SUCCESS;
}
Status SingleOp::CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
int64_t total_size = 0;
size_t index = 0;
for (auto &input_buffer : inputs) {
int64_t input_size = 0;
if (input_buffer.placement == kHostMemType) {
input_size = input_buffer.length;
// input_size pad to 512
input_size = (input_size / kAlignBytes + 1) * kAlignBytes;
inputs_size.emplace_back(index, input_size);
total_size += input_size;
GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
}
index++;
}
if (total_size > kFuzzDeviceBufferSize) {
GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
return FAILED;
}
return SUCCESS;
}
Status SingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
std::vector<DataBuffer> &update_buffers) {
if (stream_resource_->Init() != SUCCESS) {
GELOGE(FAILED, "[Malloc][Memory]Failed to malloc device buffer.");
return FAILED;
}
void *dst_addr = stream_resource_->GetDeviceBufferAddr();
// copy host mem from input_buffer to device mem of dst_addr
for (const auto &input_size : inputs_size) {
size_t index = input_size.first;
auto size = input_size.second;
GELOGD("SingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.",
index, dst_addr, size, update_buffers[index].data, update_buffers[index].length);
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
RT_MEMCPY_HOST_TO_DEVICE_EX, stream_));
update_buffers[index].data = dst_addr;
dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size);
}
return SUCCESS;
}
FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(const std::vector<DataBuffer> &inputs,
const std::vector<DataBuffer> &outputs) {
GELOGD("Start SingleOp::ExecuteAsync.");
Status ret = ValidateArgs(inputs, outputs);
if (ret != SUCCESS) {
return ret;
}
GE_CHECK_NOTNULL(stream_resource_);
vector<pair<size_t, uint64_t>> inputs_size;
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(inputs, inputs_size));
std::lock_guard<std::mutex> lk(*stream_mutex_);
vector<DataBuffer> update_buffers = inputs;
if (!inputs_size.empty()) {
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers));
}
auto current_mem_base = stream_resource_->GetMemoryBase();
if (running_param_->mem_base != current_mem_base) {
running_param_->mem_base = const_cast<uint8_t *>(current_mem_base);
@ -185,7 +240,11 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status SingleOp::ExecuteAsync(c
task->GetOpdesc()->GetName().c_str());
}
}
ret = UpdateArgs(inputs, outputs);
if (!inputs_size.empty()) {
ret = UpdateArgs(update_buffers, outputs);
} else {
ret = UpdateArgs(inputs, outputs);
}
if (ret != SUCCESS) {
return ret;
}
@ -252,11 +311,100 @@ Status DynamicSingleOp::ValidateParams(const vector<GeTensorDesc> &input_desc,
return SUCCESS;
}
Status DynamicSingleOp::UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
const vector<GeTensorDesc> &input_desc,
const std::vector<DataBuffer> &input_buffers) {
auto op_desc = op_task_->GetOpdesc();
GE_CHECK_NOTNULL(op_desc);
GELOGD("Start update inputs tensor value of %s.", op_desc->GetName().c_str());
for (const auto &input_size : inputs_size) {
size_t index = input_size.first;
auto tensor_desc = input_desc.at(index);
// reconstruct GeTensor by DataBuffer
GeTensorPtr ge_tensor = MakeShared<GeTensor>(tensor_desc);
GELOGD("The %zu tensor input type is host, desc data type is %d, input buffer addr is %p, size is %ld.",
index, tensor_desc.GetDataType(), input_buffers[index].data, input_buffers[index].length);
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(input_buffers[index].data),
static_cast<size_t>(input_buffers[index].length)) != SUCCESS) {
GELOGE(INTERNAL_ERROR, "[Set][Data]Failed to set data of ge tensor.");
return INTERNAL_ERROR;
}
auto tensor = op_desc->MutableInputDesc(index);
GE_CHECK_NOTNULL(tensor);
if (!AttrUtils::SetTensor(tensor, ATTR_NAME_VALUE, ge_tensor)) {
GELOGE(FAILED, "[Set][ATTR_NAME_VALUE]Failed to set ATTR_NAME_VALUE to %s.", op_desc->GetName().c_str());
return FAILED;
}
}
return SUCCESS;
}
Status DynamicSingleOp::CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc,
std::vector<std::pair<size_t, uint64_t>> &inputs_size) {
int64_t total_size = 0;
size_t index = 0;
for (const auto &tensor_desc : input_desc) {
int64_t input_size = 0;
int64_t mem_type = 0;
(void)AttrUtils::GetInt(tensor_desc, ATTR_NAME_PLACEMENT, mem_type);
bool is_const = false;
(void)AttrUtils::GetBool(tensor_desc, CONST_ATTR_NAME_INPUT, is_const);
if (mem_type == kHostMemType && !is_const) {
graphStatus graph_status = TensorUtils::GetTensorMemorySizeInBytes(tensor_desc, input_size);
if (graph_status != GRAPH_SUCCESS) {
REPORT_CALL_ERROR("E19999", "Get tensor size in bytes failed when CalcOutputSize.");
GELOGE(graph_status, "GetTensorMemorySizeInBytes failed!");
return FAILED;
}
// input_size pad to 512
input_size = (input_size / kAlignBytes + 1) * kAlignBytes;
inputs_size.emplace_back(index, input_size);
total_size += input_size;
GELOGD("The %zu input mem type is host, tensor size is %ld.", index, input_size);
}
index++;
}
if (total_size > kFuzzDeviceBufferSize) {
GELOGE(FAILED, "[Check][Size]Total size is %ld, larger than 1M.", total_size);
return FAILED;
}
return SUCCESS;
}
Status DynamicSingleOp::UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
std::vector<DataBuffer> &update_buffers) {
StreamResource *stream_resource = SingleOpManager::GetInstance().GetResource(resource_id_, stream_);
GE_CHECK_NOTNULL(stream_resource);
void *dst_addr = stream_resource->GetDeviceBufferAddr();
// copy host mem from input_buffer to device mem of dst_addr
for (const auto &input_size : inputs_size) {
size_t index = input_size.first;
auto size = input_size.second;
GELOGD("DynamicSingleOp: do H2D for %zu input, dst addr is %p, size is %zu, src addr is %p, length is %lu.",
index, dst_addr, size, update_buffers[index].data, update_buffers[index].length);
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, size, update_buffers[index].data, update_buffers[index].length,
RT_MEMCPY_HOST_TO_DEVICE_EX, stream_));
update_buffers[index].data = dst_addr;
dst_addr = reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(dst_addr) + size);
}
return SUCCESS;
}
Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
const vector<DataBuffer> &input_buffers,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers) {
GELOGD("Start DynamicSingleOp::ExecuteAsync.");
GE_CHK_STATUS_RET_NOLOG(ValidateParams(input_desc, input_buffers, output_desc, output_buffers));
vector<pair<size_t, uint64_t>> inputs_size;
GE_CHK_STATUS_RET_NOLOG(CalInputsHostMemSize(input_desc, inputs_size));
std::lock_guard<std::mutex> lk(*stream_mutex_);
vector<DataBuffer> update_buffers = input_buffers;
if (!inputs_size.empty()) {
GE_CHK_STATUS_RET_NOLOG(UpdateInputsTensorValue(inputs_size, input_desc, input_buffers));
GE_CHK_STATUS_RET_NOLOG(UpdateInputsBufferAddr(inputs_size, update_buffers));
}
if (hybrid_model_executor_ != nullptr) {
GELOGD("Execute multi-task dynamic single op by hybrid model executor");
hybrid::HybridModelExecutor::ExecuteArgs args;
@ -274,11 +422,12 @@ Status DynamicSingleOp::ExecuteAsync(const vector<GeTensorDesc> &input_desc,
return hybrid_model_executor_->Execute(args);
}
std::lock_guard<std::mutex> lk(*stream_mutex_);
GE_CHECK_NOTNULL(op_task_);
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
if (!inputs_size.empty()) {
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, update_buffers, output_desc, output_buffers, stream_));
} else {
GE_CHK_STATUS_RET_NOLOG(op_task_->LaunchKernel(input_desc, input_buffers, output_desc, output_buffers, stream_));
}
GE_CHK_STATUS_RET_NOLOG(op_task_->OpenDump(stream_));
GE_CHK_STATUS_RET_NOLOG(ProfilingTaskInfo(op_task_.get(), kShapeTypeDynamic));
return SUCCESS;

@ -45,6 +45,10 @@ class SingleOp {
Status ValidateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status UpdateArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status GetArgs(const std::vector<DataBuffer> &inputs, const std::vector<DataBuffer> &outputs);
Status CalInputsHostMemSize(const std::vector<DataBuffer> &inputs,
std::vector<std::pair<size_t, uint64_t>> &inputs_size);
Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
std::vector<DataBuffer> &update_buffers);
friend class SingleOpModel;
StreamResource *stream_resource_ = nullptr;
@ -76,7 +80,12 @@ class DynamicSingleOp {
const std::vector<DataBuffer> &inputs,
std::vector<GeTensorDesc> &output_desc,
std::vector<DataBuffer> &outputs) const;
Status CalInputsHostMemSize(const vector<GeTensorDesc> &input_desc,
std::vector<std::pair<size_t, uint64_t>> &inputs_size);
Status UpdateInputsBufferAddr(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
std::vector<DataBuffer> &update_buffers);
Status UpdateInputsTensorValue(const std::vector<std::pair<size_t, uint64_t>> &inputs_size,
const vector<GeTensorDesc> &input_desc, const std::vector<DataBuffer> &input_buffers);
std::unique_ptr<OpTask> op_task_;
std::unique_ptr<hybrid::HybridModel> hybrid_model_;
std::unique_ptr<hybrid::HybridModelExecutor> hybrid_model_executor_;
@ -85,6 +94,7 @@ class DynamicSingleOp {
rtStream_t stream_ = nullptr;
size_t num_inputs_ = 0;
size_t num_outputs_ = 0;
ComputeGraphPtr compute_graph_;
};
} // namespace ge
#endif // GE_SINGLE_OP_SINGLE_OP_H_

@ -83,6 +83,9 @@ StreamResource *SingleOpManager::GetResource(uintptr_t resource_id, rtStream_t s
if (it == stream_resources_.end()) {
res = new (std::nothrow) StreamResource(resource_id);
if (res != nullptr) {
if (res->Init() != SUCCESS) {
return nullptr;
}
res->SetStream(stream);
stream_resources_.emplace(resource_id, res);
}

@ -71,10 +71,10 @@ Status NeedHybridModel(GeModelPtr &ge_model, bool &flag) {
tasks[i].kernel_with_handle().context();
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type == ccKernelType::TE) {
if (infer_depend_flag) {
flag = true;
return SUCCESS;
}
// if (infer_depend_flag) {
// flag = true;
// return SUCCESS;
// }
kernel_task_num++;
if (kernel_task_num > 1) {
flag = true;
@ -304,6 +304,7 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
ParseArgTable(tbe_task, single_op);
tbe_task->SetModelArgs(model_name_, model_id_);
if (tbe_task->tiling_buffer_ != nullptr) {
GELOGD("tiling buffer is not nullptr.");
tbe_task->stream_resource_ = stream_resource;
}
single_op.tasks_.emplace_back(tbe_task);
@ -472,7 +473,8 @@ Status SingleOpModel::BuildOp(StreamResource &resource, SingleOp &single_op) {
return BuildTaskList(&resource, single_op);
}
Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingleOp &single_op) {
Status SingleOpModel::BuildModelTaskKernel(StreamResource *stream_resource, const TaskDef &task_def,
DynamicSingleOp &single_op) {
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
const auto &context = task_type == RT_MODEL_TASK_KERNEL ? task_def.kernel().context() :
task_def.kernel_with_handle().context();
@ -483,6 +485,10 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
TbeOpTask *tbe_task = nullptr;
GE_CHK_STATUS_RET_NOLOG(BuildKernelTask(task_def, &tbe_task));
tbe_task->SetModelArgs(model_name_, model_id_);
if (tbe_task->tiling_buffer_ != nullptr) {
GELOGD("tiling buffer is not nullptr.");
tbe_task->stream_resource_ = stream_resource;
}
single_op.op_task_.reset(tbe_task);
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
GELOGD("Building AICPU_CC task");
@ -504,10 +510,13 @@ Status SingleOpModel::BuildModelTaskKernel(const TaskDef &task_def, DynamicSingl
return SUCCESS;
}
Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
Status SingleOpModel::BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &single_op) {
auto ge_model = model_helper_.GetGeModel();
GE_CHECK_NOTNULL(ge_model);
auto compute_graph = GraphUtils::GetComputeGraph(ge_model->GetGraph());
GE_CHECK_NOTNULL(compute_graph);
single_op.compute_graph_ = compute_graph;
auto tasks = ge_model->GetModelTaskDefPtr()->task();
for (int i = 0; i < tasks.size(); ++i) {
const TaskDef &task_def = tasks[i];
@ -521,7 +530,7 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) {
"BuildTaskListForDynamicOp fail for Do not support dynamic op with multiple tasks.");
return ACL_ERROR_GE_OP_TASK_TYPE_INVALID;
}
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(task_def, single_op));
GE_CHK_STATUS_RET_NOLOG(BuildModelTaskKernel(stream_resource, task_def, single_op));
} else if (task_type == RT_MODEL_TASK_KERNEL_EX) {
if (single_op.op_task_ != nullptr) {
GELOGE(ACL_ERROR_GE_OP_TASK_TYPE_INVALID, "[Check][TaskType]Do not support dynamic op with multiple tasks.");
@ -585,6 +594,6 @@ Status SingleOpModel::BuildDynamicOp(StreamResource &resource, DynamicSingleOp &
GE_CHK_STATUS_RET(single_op.hybrid_model_executor_->Init(), "[Init][HybridModelExecutor]Failed.");
return SUCCESS;
}
return BuildTaskListForDynamicOp(single_op);
return BuildTaskListForDynamicOp(&resource, single_op);
}
} // namespace ge

@ -65,12 +65,13 @@ class SingleOpModel {
void ParseOutputNode(const OpDescPtr &op_desc);
Status BuildTaskList(StreamResource *stream_resource, SingleOp &single_op);
Status BuildTaskListForDynamicOp(DynamicSingleOp &dynamic_single_op);
Status BuildTaskListForDynamicOp(StreamResource *stream_resource, DynamicSingleOp &dynamic_single_op);
Status BuildKernelTask(const domi::TaskDef &task_def, TbeOpTask **task);
Status BuildKernelExTask(const domi::KernelExDef &kernel_def, AiCpuTask **task,
bool dynamic_flag, bool& depend_compute_flag, uint64_t kernel_id);
Status BuildCpuKernelTask(const domi::KernelDef &kernel_def, OpTask **task, uint64_t kernel_id);
Status BuildModelTaskKernel(const domi::TaskDef &task_def, DynamicSingleOp &single_op);
Status BuildModelTaskKernel(StreamResource *stream_resource, const domi::TaskDef &task_def,
DynamicSingleOp &single_op);
static void ParseOpModelParams(ModelHelper &model_helper, SingleOpModelParam &param);
void ParseArgTable(OpTask *task, SingleOp &op);

@ -22,6 +22,11 @@
#include "single_op/single_op_model.h"
namespace ge {
namespace {
// limit available device mem size 1M
const uint32_t kFuzzDeviceBufferSize = 1 * 1024 * 1024;
}
StreamResource::StreamResource(uintptr_t resource_id) : resource_id_(resource_id) {
}
@ -39,6 +44,17 @@ StreamResource::~StreamResource() {
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}
if (device_buffer_ != nullptr) {
auto rt_ret = rtFree(device_buffer_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Free][Rt] failed."));
}
}
Status StreamResource::Init() {
auto rt_ret = rtMalloc(&device_buffer_, kFuzzDeviceBufferSize, RT_MEMORY_HBM);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "[Malloc][Rt] failed."));
return SUCCESS;
}
SingleOp *StreamResource::GetOperator(const uint64_t key) {

@ -40,6 +40,7 @@ class StreamResource {
rtStream_t GetStream() const;
void SetStream(rtStream_t stream);
Status Init();
SingleOp *GetOperator(const uint64_t key);
DynamicSingleOp *GetDynamicOperator(const uint64_t key);
@ -49,6 +50,7 @@ class StreamResource {
uint8_t *MallocMemory(const std::string &purpose, size_t size, bool holding_lock = true);
uint8_t *MallocWeight(const std::string &purpose, size_t size);
const uint8_t *GetMemoryBase() const;
void *GetDeviceBufferAddr() const {return device_buffer_; }
private:
uint8_t *DoMallocMemory(const std::string &purpose,
@ -65,6 +67,7 @@ class StreamResource {
rtStream_t stream_ = nullptr;
std::mutex mu_;
std::mutex stream_mu_;
void *device_buffer_ = nullptr;
};
} // namespace ge

@ -333,8 +333,8 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
vector<GeTensorDesc> &output_desc,
vector<DataBuffer> &output_buffers,
rtStream_t stream) {
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
GELOGD("[%s] Start to launch kernel", node_->GetName().c_str());
GE_CHK_STATUS_RET_NOLOG(UpdateRunInfo(input_desc, output_desc));
std::vector<void *> args;
for (auto &buffer : input_buffers) {
args.emplace_back(buffer.data);
@ -354,6 +354,15 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
args.emplace_back(tiling_buffer_);
}
GELOGD("Dst addr is %p, dst size is %zu, src addr is %p, src size is %zu.",
args_.get(), arg_size_, args.data(), args.size() * sizeof(void *));
// node with workspace: build can not get size of workspace, need to update arg_size_ when execute
if (arg_size_ < (args.size() * sizeof(void *))) {
size_t temp_size = args.size() * sizeof(void *);
GELOGD("Need to reset size of args_ from %zu to %zu.", arg_size_, temp_size);
args_.reset(new(std::nothrow) uint8_t[temp_size]());
arg_size_ = temp_size;
}
if (memcpy_s(args_.get(), arg_size_, args.data(), args.size() * sizeof(void *)) != EOK) {
GELOGE(ACL_ERROR_GE_MEMORY_OPERATE_FAILED, "[Update][KernelArgs] failed for [%s].", node_->GetName().c_str());
REPORT_INNER_ERROR("E19999", "update kernel args failed for %s.", node_->GetName().c_str());
@ -362,13 +371,14 @@ Status TbeOpTask::LaunchKernel(const vector<GeTensorDesc> &input_desc,
GELOGD("[%s] Start to invoke rtKernelLaunch", node_->GetName().c_str());
if (handle_ == nullptr) {
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), arg_size_, nullptr, stream));
GE_CHK_RT_RET(rtKernelLaunch(stub_func_, block_dim_, args_.get(), static_cast<uint32_t>(arg_size_),
nullptr, stream));
GELOGD("[%s] Done invoking rtKernelLaunch successfully", node_->GetName().c_str());
} else {
std::string dev_func = original_kernel_key_ + "_" + std::to_string(tiling_key_);
std::string kernel_info = node_info_ + "/" + std::to_string(tiling_key_);
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(), arg_size_, nullptr,
stream, kernel_info.c_str()));
GE_CHK_RT_RET(rtKernelLaunchWithHandle(handle_, dev_func.c_str(), block_dim_, args_.get(),
static_cast<uint32_t>(arg_size_), nullptr, stream, kernel_info.c_str()));
GELOGD("[%s] Done invoking rtKernelLaunchWithHandle successfully", node_->GetName().c_str());
}

@ -65,10 +65,12 @@ class GE_FUNC_VISIBILITY GeGenerator {
/// @param [in] inputs: input tensors.
/// @param [in] outputs: output tensors.
/// @param [in] model_file_name: name of model file.
/// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
/// @return SUCCESS or FAILED
///
Status BuildSingleOpModel(OpDescPtr &op_desc, const std::vector<GeTensor> &inputs,
const std::vector<GeTensor> &outputs, const std::string &model_file_name);
const std::vector<GeTensor> &outputs, const std::string &model_file_name,
int32_t compile_flag = 0);
///
/// @ingroup ge
/// @brief: Build single Op into model buff.
@ -79,8 +81,6 @@ class GE_FUNC_VISIBILITY GeGenerator {
/// @param [in] compile_flag: op build flag, accurate build is 0, fuzz build is 1
/// @param [out] model_buff: model buff of op.
/// @return SUCCESS or FAILED
Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
OpEngineType engine_type, ModelBufferData &model_buff);
Status BuildSingleOpModel(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
OpEngineType engine_type, int32_t compile_flag, ModelBufferData &model_buff);
///
@ -100,7 +100,7 @@ class GE_FUNC_VISIBILITY GeGenerator {
ge::ModelBufferData &model, bool is_offline = true);
Status BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs,
const string &model_file_name, OpEngineType engine_type, ModelBufferData &model_buff,
bool is_offline = true);
bool is_offline = true, int32_t compile_flag = 0);
bool CheckNoAicore(const ComputeGraphPtr &graph);
void RemoveConst(const vector<GeTensor> &inputs, vector<GeTensor> &outputs);
Status CheckForSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &inputs, const vector<GeTensor> &outputs);

@ -123,6 +123,7 @@ struct OmgContext {
bool need_multi_batch = false;
std::vector<NodePtr> data_nodes;
std::vector<NodePtr> getnext_nosink_nodes;
bool fuzz_compile_flag = false;
};
} // namespace ge

@ -276,6 +276,7 @@ set(COMMON_SRC_FILES
"${GE_CODE_DIR}/ge/graph/passes/remove_same_const_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/useless_control_out_remove_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/parallel_group_pass.cc"
"${GE_CODE_DIR}/ge/graph/passes/mark_node_unknown_shape_pass.cc"
"${GE_CODE_DIR}/ge/model/ge_model.cc"
"${GE_CODE_DIR}/ge/common/cust_aicpu_kernel_store.cc"
"${GE_CODE_DIR}/ge/graph/load/model_manager/model_utils.cc"
@ -703,6 +704,7 @@ set(PASS_TEST_FILES
"graph/passes/link_gen_mask_nodes_pass_unittest.cc"
"graph/passes/transpose_transdata_pass_unittest.cc"
"graph/passes/parallel_group_pass_unittest.cc"
"graph/passes/mark_node_unknown_shape_pass_unittest.cc"
)
set(KERNEL_TEST_FILES
@ -791,6 +793,7 @@ set(SINGLE_OP_TEST_FILES
"single_op/single_op_manager_unittest.cc"
"single_op/stream_resource_unittest.cc"
"single_op/single_op_task_unittest.cc"
"single_op/single_op_unittest.cc"
)
set(PROFILING_MNG_TEST_FILES

@ -85,7 +85,7 @@ TEST_F(UtestGeGenerator, test_build_single_op_online) {
GeGenerator generator;
generator.Initialize({});
ModelBufferData model_buffer;
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, model_buffer), FAILED);
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, false, model_buffer), FAILED);
}
TEST_F(UtestGeGenerator, test_singleop_fuzz_build) {
@ -103,7 +103,7 @@ TEST_F(UtestGeGenerator, test_singleop_fuzz_build) {
generator.Initialize({});
ModelBufferData model_buffer;
bool compile_flag = true;
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), SUCCESS);
EXPECT_EQ(generator.BuildSingleOpModel(op_desc, inputs, outputs, ENGINE_AIVECTOR, compile_flag, model_buffer), FAILED);
}
TEST_F(UtestGeGenerator, test_check_aicore) {

@ -0,0 +1,107 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <cstdint>
#include <memory>
#include <string>
#define private public
#include "graph/passes/mark_node_unknown_shape_pass.h"
#include "common/ge_inner_error_codes.h"
#include "inc/pass_manager.h"
#undef private
namespace ge {
class UtestMarkNodeUnknownShapePass : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
public:
NodePtr MakeNode(const ComputeGraphPtr &graph, uint32_t in_num, uint32_t out_num, string name, string type) {
GeTensorDesc test_desc(GeShape(), FORMAT_NCHW, DT_FLOAT);
auto op_desc = std::make_shared<OpDesc>(name, type);
for (auto i = 0; i < in_num; ++i) {
op_desc->AddInputDesc(test_desc);
}
for (auto i = 0; i < out_num; ++i) {
op_desc->AddOutputDesc(test_desc);
}
return graph->AddNode(op_desc);
}
void make_graph(const ComputeGraphPtr &graph) {
auto conv2d_node = MakeNode(graph, 2, 1, "conv1", "Conv2D");
{
auto data1 = MakeNode(graph, 1, 1, "data", "Data");
GeTensorDesc tensor_desc(GeShape({1,3,224,224}), FORMAT_NCHW, DT_FLOAT);
data1->GetOpDesc()->UpdateInputDesc(0, tensor_desc);
data1->GetOpDesc()->UpdateOutputDesc(0, tensor_desc);
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(0));
GraphUtils::AddEdge(data1->GetOutDataAnchor(0), conv2d_node->GetInDataAnchor(1));
}
conv2d_node->GetOpDesc()->SetOpKernelLibName("AIcoreEngine");
AttrUtils::SetBool(conv2d_node->GetOpDesc(), ATTR_NAME_FUZZ_BUILD_RES_ATTRS, true);
auto output_node = MakeNode(graph, 1, 0, "output1", "NetOutput");
GraphUtils::AddEdge(conv2d_node->GetOutDataAnchor(0), output_node->GetInDataAnchor(0));
}
};
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_GE_kernel) {
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
op_desc->SetOpKernelLibName("GE");
graph->AddNode(op_desc);
PassManager pass;
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
EXPECT_EQ(pass.Run(graph), SUCCESS);
}
TEST_F(UtestMarkNodeUnknownShapePass, test_run_without_fuzz_attrs) {
OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
op_desc->SetOpKernelLibName("AIcoreEngine");
graph->AddNode(op_desc);
PassManager pass;
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
EXPECT_EQ(pass.Run(graph), SUCCESS);
}
TEST_F(UtestMarkNodeUnknownShapePass, test_run_with_fuzz_attrs) {
ComputeGraphPtr graph = std::make_shared<ComputeGraph>("test_graph");
make_graph(graph);
PassManager pass;
pass.AddPass("MarkNodeUnknownShapePass", new (std::nothrow) MarkNodeUnknownShapePass);
EXPECT_EQ(pass.Run(graph), SUCCESS);
EXPECT_EQ(graph->GetAllNodes().size(), 3);
for (const auto &node : graph->GetAllNodes()) {
if (node->GetName() == "conv1") {
auto op_desc = node->GetOpDesc();
EXPECT_NE(op_desc, nullptr);
for (size_t i = 0; i < op_desc->GetAllInputsSize(); ++i) {
auto input_desc = op_desc->MutableInputDesc(static_cast<uint32_t>(i));
EXPECT_TRUE(input_desc->GetShape().GetDim(0) == -2);
}
for (auto &output_desc : op_desc->GetAllOutputsDescPtr()) {
EXPECT_NE(output_desc, nullptr);
EXPECT_TRUE(output_desc->GetShape().GetDim(0) == -2);
}
}
}
}
} // namespace ge

@ -0,0 +1,108 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <vector>
#include "runtime/rt.h"
#define protected public
#define private public
#include "single_op/single_op.h"
#include "single_op/single_op_manager.h"
#undef private
#undef protected
using namespace std;
using namespace ge;
class UtestSingleOp : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async) {
uintptr_t resource_id = 0;
std::mutex stream_mu;
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);
vector<int64_t> dims_vec_0 = {2};
vector<GeTensorDesc> input_desc;
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
// input data from device
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 0);
input_desc.emplace_back(tensor_desc_0);
vector<DataBuffer> input_buffers;
ge::DataBuffer data_buffer;
data_buffer.data = new char[4];
data_buffer.length = 4;
input_buffers.emplace_back(data_buffer);
vector<GeTensorDesc> output_desc;
vector<DataBuffer> output_buffers;
// UpdateRunInfo failed
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), ACL_ERROR_GE_PARAM_INVALID);
}
TEST_F(UtestSingleOp, test_dynamic_singleop_execute_async1) {
uintptr_t resource_id = 0;
std::mutex stream_mu;
rtStream_t stream = nullptr;
rtStreamCreate(&stream, 0);
DynamicSingleOp dynamic_single_op(resource_id, &stream_mu, stream);
dynamic_single_op.num_inputs_ = 1;
vector<int64_t> dims_vec_0 = {2};
vector<GeTensorDesc> input_desc;
GeTensorDesc tensor_desc_0(GeShape(dims_vec_0), FORMAT_NCHW, DT_INT32);
// input data from host
AttrUtils::SetInt(tensor_desc_0, ATTR_NAME_PLACEMENT, 1);
input_desc.emplace_back(tensor_desc_0);
int64_t input_size = 0;
EXPECT_EQ(TensorUtils::GetTensorMemorySizeInBytes(tensor_desc_0, input_size), SUCCESS);
EXPECT_EQ(input_size, 64);
EXPECT_NE(SingleOpManager::GetInstance().GetResource(resource_id, stream), nullptr);
vector<DataBuffer> input_buffers;
ge::DataBuffer data_buffer;
data_buffer.data = new char[4];
data_buffer.length = 4;
input_buffers.emplace_back(data_buffer);
vector<GeTensorDesc> output_desc;
vector<DataBuffer> output_buffers;
auto *tbe_task = new (std::nothrow) TbeOpTask();
ge::OpDescPtr op_desc = std::make_shared<OpDesc>("Mul", MATMUL);
ge::ComputeGraphPtr graph = std::make_shared<ge::ComputeGraph>("default");
ge::NodePtr node = graph->AddNode(op_desc);
tbe_task->node_ = node;
dynamic_single_op.op_task_.reset((OpTask *)(tbe_task));
OpDescPtr desc_ptr = MakeShared<OpDesc>("name1", "type1");
EXPECT_EQ(desc_ptr->AddInputDesc("x", GeTensorDesc(GeShape({2}), FORMAT_NCHW)), GRAPH_SUCCESS);
dynamic_single_op.op_task_->op_desc_ = desc_ptr;
// UpdateRunInfo failed
EXPECT_EQ(dynamic_single_op.ExecuteAsync(input_desc, input_buffers, output_desc, output_buffers), PARAM_INVALID);
}
Loading…
Cancel
Save