!718 Synchronize latest Ascend software suite 24 Dec 2020

From: @nicholas_yhr
Reviewed-by: @liujunzhu,@youui
Signed-off-by: @youui
pull/718/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c762dd5dcc

@ -607,7 +607,7 @@ set(INFER_SRC_LIST
if (NOT ENABLE_D AND NOT ENABLE_ACL AND NOT ENABLE_MS_TESTCASES)
############ libge_runner.so ############
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS} $<TARGET_OBJECTS:msprofiler_fwk>)
add_library(ge_runner SHARED ${TRAIN_SRC_LIST} ${PROTO_SRCS} ${PROTO_CLIENT_SRCS})
target_compile_definitions(ge_runner PRIVATE
PROTOBUF_INLINE_NOT_IN_HEADERS=0
@ -648,11 +648,14 @@ target_include_directories(ge_runner PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)
target_link_libraries(ge_runner
target_link_libraries(ge_runner PRIVATE
$<BUILD_INTERFACE:intf_pub>
ge_memory
adump_server
static_mmpa
-Wl,--whole-archive
msprofiler_fwk
-Wl,--no-whole-archive
-Wl,--no-as-needed
graph
ge_common
@ -712,7 +715,7 @@ target_include_directories(ge_compiler PRIVATE
${GE_CODE_DIR}/third_party/fwkacllib/inc/toolchain
)
target_link_libraries(ge_compiler
target_link_libraries(ge_compiler PRIVATE
$<BUILD_INTERFACE:intf_pub>
ge_memory
static_mmpa
@ -766,7 +769,14 @@ target_link_options(opensrc_ascendcl PRIVATE
-Wl,--allow-multiple-definition
-Wl,-z,muldefs
-Wl,-Bsymbolic
-Wl,--exclude-libs,ALL
-Wl,--exclude-libs,libascend_protobuf.a
-Wl,--exclude-libs,libge_executor.a
-Wl,--exclude-libs,libge_common.a
-Wl,--exclude-libs,libgraph.a
-Wl,--exclude-libs,libmmpa.a
-Wl,--exclude-libs,libregister.a
-Wl,--exclude-libs,liberror_manager.a
-Wl,--exclude-libs,libadump_server.a
)
target_link_libraries(opensrc_ascendcl PRIVATE
-Wl,--whole-archive

@ -94,6 +94,9 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
for (auto dim : output_descs.at(i).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : output_descs.at(i).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@ -118,6 +121,9 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
for (auto dim : input_descs.at(i).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : input_descs.at(i).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@ -214,8 +220,15 @@ Status DumpOp::LaunchDumpOp() {
SetOpMappingLoopAddr(global_step_, loop_per_iter_, loop_cond_, op_mapping_info);
GELOGI("Dump step is %s ,dump path is %s ,in Launch dump op", dump_properties_.GetDumpStep().c_str(),
dump_path.c_str());
uint32_t task_id = 0;
uint32_t stream_id = 0;
rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGW("call rtGetTaskIdAndStreamID failed, ret = 0x%X", rt_ret);
}
aicpu::dump::Task task;
task.set_task_id(task_id);
task.set_stream_id(stream_id);
task.mutable_op()->set_op_name(op_desc_->GetName());
task.mutable_op()->set_op_type(op_desc_->GetType());
if (dump_properties_.GetDumpMode() == kDumpOutput) {

@ -181,12 +181,19 @@ void TBEPluginManager::GetCustomOpPath(std::string &customop_path) {
void TBEPluginManager::LoadCustomOpLib() {
LoadPluginSo(options_);
std::string fmk_type = std::to_string(domi::TENSORFLOW);
auto it = options_.find(ge::FRAMEWORK_TYPE);
if (it != options_.end()) {
fmk_type = it->second;
}
std::vector<OpRegistrationData> registration_datas = domi::OpRegistry::Instance()->registrationDatas;
GELOGI("The size of registration_datas is: %zu", registration_datas.size());
for (OpRegistrationData reg_data : registration_datas) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
domi::OpRegistry::Instance()->Register(reg_data);
if (std::to_string(reg_data.GetFrameworkType()) == fmk_type) {
GELOGD("Begin to register optype: %s, imply_type: %s", reg_data.GetOmOptype().c_str(),
TypeUtils::ImplyTypeToSerialString(reg_data.GetImplyType()).c_str());
(void)domi::OpRegistry::Instance()->Register(reg_data);
}
}
}

@ -112,7 +112,6 @@ ge::Status RegProfCtrlCallback(MsprofCtrlCallback func) {
if (ge::ProfilingManager::Instance().GetMsprofCallback().msprofCtrlCallback != nullptr) {
GELOGW("Msprof ctrl callback is exist, just ignore it.");
} else {
GELOGI("GE register Msprof ctrl callback.");
ge::ProfilingManager::Instance().SetMsprofCtrlCallback(func);
}
return ge::SUCCESS;
@ -124,7 +123,6 @@ ge::Status RegProfSetDeviceCallback(MsprofSetDeviceCallback func) {
return ge::PARAM_INVALID;
}
// Pass MsprofSetDeviceCallback to runtime
GELOGI("GE pass setdevice callback to runtime.");
ge::Status rt_ret = rtRegDeviceStateCallback(kRtSetDeviceRegName.c_str(), static_cast<rtDeviceStateCallback>(func));
if (rt_ret != ge::SUCCESS) {
GELOGE(rt_ret, "Pass MsprofSetDeviceCallback to runtime failed!");
@ -158,7 +156,7 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
if (type != kProfCommandhandleFinalize) {
GE_CHECK_NOTNULL(data);
}
ProfCommandHandleData *prof_config_param = (ProfCommandHandleData *)data;
ProfCommandHandleData *prof_config_param = reinterpret_cast<ProfCommandHandleData *>(data);
auto iter = kProfCommandTypeMap.find(type);
if (iter == kProfCommandTypeMap.end()) {
GELOGW("The prof comand type is invalid.");
@ -183,7 +181,8 @@ ge::Status ProfCommandHandle(ProfCommandHandleType type, void *data, uint32_t le
if (type != kProfCommandhandleFinalize) {
command.module_index = prof_config_param->profSwitch;
}
GELOGI("GE commandhandle execute, Command Type: %d, data type config: 0x%llx", type, command.module_index);
GELOGI("GE commandhandle execute, Command Type: %s, data type config: 0x%llx", iter->second.c_str(),
command.module_index);
if (type == kProfCommandhandleStart || type == kProfCommandhandleStop) {
GELOGI("Profiling device nums:%s , deviceID:[%s]", prof_params[0].c_str(), prof_params[kDeviceListIndex].c_str());
}

@ -38,10 +38,8 @@ const std::string kProfModelUnsubscribe = "prof_model_cancel_subscribe";
} // namespace
namespace ge {
ProfilingManager::ProfilingManager() : is_load_profiling_(false),
is_execute_profiling_(false),
is_training_trace_(false),
subscribe_count_(0) {
ProfilingManager::ProfilingManager()
: is_load_profiling_(false), is_execute_profiling_(false), is_training_trace_(false), subscribe_count_(0) {
prof_cb_.msprofCtrlCallback = nullptr;
prof_cb_.msprofReporterCallback = nullptr;
}
@ -102,8 +100,8 @@ ge::Status ProfilingManager::InitFromOptions(const Options &options, MsprofGeOpt
return INTERNAL_ERROR;
}
is_execute_profiling_ = true;
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(),
prof_conf.options, options.profiling_options.c_str());
GELOGI("The profiling in options is %s, %s. origin option: %s", options.profiling_mode.c_str(), prof_conf.options,
options.profiling_options.c_str());
} else {
(void)mmGetEnv("PROFILING_MODE", env_profiling_mode, MMPA_MAX_PATH);
(void)mmGetEnv("PROFILING_OPTIONS", prof_conf.options, MSPROF_OPTIONS_DEF_LEN_MAX);
@ -143,6 +141,9 @@ ge::Status ProfilingManager::ParseOptions(const std::string &options) {
}
try {
Json prof_options = Json::parse(options);
if (options.find(kTrainingTrace) == std::string::npos) {
return ge::SUCCESS;
}
const std::string training_trace = prof_options[kTrainingTrace];
if (training_trace.empty()) {
GELOGI("Training trace will not take effect.");
@ -802,32 +803,46 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY void ProfilingManager::GetFpBpP
if (!fp_point_.empty() && !bp_point_.empty()) {
fp_point = fp_point_;
bp_point = bp_point_;
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(), fp_point.c_str());
GELOGI("Bp Fp have been initialized in env or options. bp_point: %s, fp_point: %s", bp_point.c_str(),
fp_point.c_str());
return;
}
// ProfApi mode and training trace is set
try {
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
// Parse options first
char env_profiling_options[MSPROF_OPTIONS_DEF_LEN_MAX] = { 0x00 };
bool is_profiling_valid = false;
std::string profiling_options;
if (ge::GetContext().GetOption(OPTION_EXEC_PROFILING_OPTIONS, profiling_options) == SUCCESS &&
!profiling_options.empty()) {
is_profiling_valid = true;
} else {
INT32 ret = mmGetEnv("PROFILING_OPTIONS", env_profiling_options, MSPROF_OPTIONS_DEF_LEN_MAX);
if (ret != EN_OK) {
GELOGI("PROFILING_OPTIONS env is not exist.");
return;
}
GELOGI("Parse env PROFILING_OPTIONS:%s.", env_profiling_options);
Json prof_options = Json::parse(env_profiling_options);
profiling_options = env_profiling_options;
is_profiling_valid = true;
}
if (is_profiling_valid) {
try {
Json prof_options = Json::parse(profiling_options);
fp_point_ = prof_options[kFpPoint];
bp_point_ = prof_options[kBpPoint];
fp_point_ = prof_options[kFpPoint];
bp_point_ = prof_options[kBpPoint];
fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
fp_point = fp_point_;
bp_point = bp_point_;
if (!fp_point_.empty() && !bp_point_.empty()) {
GELOGI("Training trace bp fp is set, bp_point:%s, fp_point:%s.", bp_point_.c_str(), fp_point_.c_str());
}
} catch (...) {
GELOGW("Json prof options is invalid.");
return;
}
} catch (...) {
GELOGE(FAILED, "Json prof options is invalid.");
return;
}
return;
}

@ -36,21 +36,21 @@ using Json = nlohmann::json;
namespace {
const std::string GE_PROFILING_MODULE = "Framework";
// DataTypeConfig MASK
#define PROF_ACL_API_MASK 0x0001
#define PROF_TASK_TIME_MASK 0x0002
#define PROF_AICORE_METRICS_MASK 0x0004
#define PROF_AICPU_TRACE_MASK 0x0008
#define PROF_MODEL_EXECUTE_MASK 0x0010
#define PROF_RUNTIME_API_MASK 0x0020
#define PROF_RUNTIME_TRACE_MASK 0x0040
#define PROF_SCHEDULE_TIMELINE_MASK 0x0080
#define PROF_SCHEDULE_TRACE_MASK 0x0100
#define PROF_AIVECTORCORE_METRICS_MASK 0x0200
#define PROF_SUBTASK_TIME_MASK 0x0400
#define PROF_TRAINING_TRACE_MASK 0x0800
#define PROF_HCCL_TRACE_MASK 0x1000
#define PROF_DATA_PROCESS_MASK 0x2000
#define PROF_MODEL_LOAD_MASK 0x8000000000000000
const uint64_t PROF_ACL_API_MASK = 0x0001;
const uint64_t PROF_TASK_TIME_MASK = 0x0002;
const uint64_t PROF_AICORE_METRICS_MASK = 0x0004;
const uint64_t PROF_AICPU_TRACE_MASK = 0x0008;
const uint64_t PROF_MODEL_EXECUTE_MASK = 0x0010;
const uint64_t PROF_RUNTIME_API_MASK = 0x0020;
const uint64_t PROF_RUNTIME_TRACE_MASK = 0x0040;
const uint64_t PROF_SCHEDULE_TIMELINE_MASK = 0x0080;
const uint64_t PROF_SCHEDULE_TRACE_MASK = 0x0100;
const uint64_t PROF_AIVECTORCORE_METRICS_MASK = 0x0200;
const uint64_t PROF_SUBTASK_TIME_MASK = 0x0400;
const uint64_t PROF_TRAINING_TRACE_MASK = 0x0800;
const uint64_t PROF_HCCL_TRACE_MASK = 0x1000;
const uint64_t PROF_DATA_PROCESS_MASK = 0x2000;
const uint64_t PROF_MODEL_LOAD_MASK = 0x8000000000000000;
} // namespace
namespace ge {
@ -80,7 +80,8 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ProfilingManager {
bool ProfilingTrainingTraceOn() const { return is_training_trace_; }
bool ProfilingModelLoadOn() const { return is_load_profiling_; }
bool ProfilingModelExecuteOn() const;
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; } // is_execute_profiling_ only used by ge option and env
// is_execute_profiling_ only used by ge option and env
bool ProfilingOn() const { return is_load_profiling_ && is_execute_profiling_; }
void ReportProfilingData(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,
const std::vector<ComputeGraphDescInfo> &compute_graph_desc_info);
void ProfilingTaskDescInfo(uint32_t model_id, const std::vector<TaskDescInfo> &task_desc_info,

@ -15,6 +15,7 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}
message Input {
@ -23,6 +24,7 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}
enum BufferType {

@ -209,19 +209,6 @@ bool IsDynmaicDimsSizeMatchModel(const vector<uint64_t> cur_dynamic_dims,
namespace ge {
bool GeExecutor::isInit_ = false;
class ModelListenerAdapter : public ModelListener {
public:
domi::Status OnComputeDone(uint32_t model_id, uint32_t dataIndex, uint32_t resultCode,
std::vector<ge::OutputTensorInfo> &outputs) {
if (listener == nullptr) {
GELOGE(ge::FAILED, "listener is null.");
return FAILED;
}
return listener->OnComputeDone(model_id, dataIndex, resultCode, outputs);
}
std::shared_ptr<ge::ModelListener> listener;
};
static void InitOpsProtoManger() {
string opsproto_path;
@ -573,60 +560,6 @@ Status GeExecutor::SetDynamicAippData(uint32_t model_id, void *dynamic_input_add
return SUCCESS;
}
// Load model
Status GeExecutor::LoadModelOffline(uint32_t &model_id, const std::string &path, const std::string &key,
int32_t priority, std::shared_ptr<ge::ModelListener> listener) {
GELOGI("load model offline begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}
string filePath = RealPath(path.c_str());
if (filePath.empty()) {
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID,
"File path is invalid. please check your text file '%s'.", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}
std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;
Status ret = GraphLoader::LoadModelFromFile(path, key, priority, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModelFromFile failed");
return ACL_ERROR_GE_LOAD_MODEL;
}
return SUCCESS;
}
Status GeExecutor::LoadModel(uint32_t &model_id, const ModelData &model_data,
std::shared_ptr<ge::ModelListener> listener) {
GELOGI("Load model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}
std::shared_ptr<ModelListenerAdapter> listener_adapter = MakeShared<ModelListenerAdapter>();
if (listener_adapter == nullptr) {
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "ModelListenerAdapter make shared failed!");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
listener_adapter->listener = listener;
Status ret = GraphLoader::LoadModel(model_data, listener_adapter, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "[GeExecutor] LoadModel failed.");
return ACL_ERROR_GE_LOAD_MODEL;
}
return ret;
}
Status GeExecutor::UnloadModel(uint32_t model_id) {
GELOGD("unload model %u begin.", model_id);
if (!isInit_) {
@ -659,21 +592,6 @@ Status GeExecutor::UnloadModel(uint32_t model_id) {
return SUCCESS;
}
Status GeExecutor::RunModel(const ge::RunModelData &input_data, ge::RunModelData &output_data) {
GELOGI("run model begin.");
if (!isInit_) {
GELOGE(ACL_ERROR_GE_EXEC_NOT_INIT, "GeExecutor has not been initialized!");
return ACL_ERROR_GE_EXEC_NOT_INIT;
}
InputData inputs;
GetDomiInputData(input_data, inputs);
OutputData outputs;
GetDomiOutputData(output_data, outputs);
return GraphExecutor::DataInput(inputs, outputs);
}
// Get input and output descriptor
Status GeExecutor::GetModelDescInfo(uint32_t model_id, std::vector<ge::TensorDesc> &input_desc,
std::vector<ge::TensorDesc> &output_desc, bool new_model_desc) {

@ -15,6 +15,7 @@ message Output {
int32 original_output_data_type = 7;
int32 original_output_format = 8;
uint64 size = 9;
Shape origin_shape = 10;
}
message Input {
@ -23,6 +24,7 @@ message Input {
Shape shape = 3;
uint64 address = 4;
uint64 size = 5;
Shape origin_shape = 6;
}
enum BufferType {

@ -39,7 +39,7 @@ namespace {
} \
ge_tensor = MakeShared<GeTensor>(out_desc); \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGI("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
GELOGD("node:%s allocate output %zu success, size=%lld", op_desc->GetName().c_str(), i, data_num * sizeof(TYPE));\
if (ge_tensor->SetData(reinterpret_cast<uint8_t *>(buf.get()), data_num * sizeof(TYPE)) != GRAPH_SUCCESS) { \
GELOGE(MEMALLOC_FAILED, "Set data for output %zu of node %s failed.", i, op_desc->GetName().c_str()); \
return MEMALLOC_FAILED; \
@ -50,8 +50,7 @@ namespace {
} else { \
ge_tensor = outputs[i]; \
GE_CHECK_NOTNULL(ge_tensor); \
GELOGI("node:%s existed output %zu, addr=%p, size=%lld", op_desc->GetName().c_str(), i, \
reinterpret_cast<const uint8_t *>(ge_tensor->GetData().data()), ge_tensor->GetData().size()); \
GELOGD("node:%s existed output %zu", op_desc->GetName().c_str(), i); \
} \
auto tensor = TensorAdapter::AsTensor(*ge_tensor); \
auto tensor_name = op_desc->GetOutputNameByIndex(i); \

@ -563,6 +563,19 @@ Status GeGenerator::GenerateModel(const Graph &graph, const string &file_name_pr
GE_CHECK_NOTNULL(ge_root_model);
GE_CHECK_NOTNULL(ge_root_model->GetRootGraph());
ModelHelper model_helper;
string model_name = "";
Status name_ret = model_helper.GetModelNameFromMergedGraphName(ge_root_model->GetRootGraph()->GetName(),
model_name);
if (name_ret != SUCCESS) {
ErrorManager::GetInstance().ATCReportErrMessage("E10000", {"parameter"}, {"output"});
GELOGE(FAILED, "Get model_name failed. Param --output is invalid.");
return PARAM_INVALID;
}
map<string, GeModelPtr> name_to_ge_model = ge_root_model->GetSubgraphInstanceNameToModel();
GeModelPtr &ge_model = name_to_ge_model[ge_root_model->GetRootGraph()->GetName()];
GE_RETURN_WITH_LOG_IF_FALSE(ge_model != nullptr, "ge_model cannot be null");
ge_model->SetName(model_name);
ret = impl_->SaveRootModel(file_name_prefix, ge_root_model, model);
if (ret != SUCCESS) {
GELOGE(ret, "Save model failed");

@ -99,7 +99,7 @@ Status GraphMemoryAssigner::AssignMemory() {
MemoryOffset memory_offset(RT_MEMORY_HBM, mem_assigner->GetMemOffset());
memory_offset_.emplace(RT_MEMORY_HBM, memory_offset);
if (mem_assigner->GetP2PMemOffset() > 0) {
if (mem_assigner->GetP2PMemOffset() >= 0) {
MemoryOffset p2p_memory_offset(RT_MEMORY_P2P_DDR, mem_assigner->GetP2PMemOffset());
memory_offset_.emplace(RT_MEMORY_P2P_DDR, p2p_memory_offset);
}

@ -48,26 +48,41 @@ void StreamGraphOptimizer::RefreshNodeId(const ComputeGraphPtr &comp_graph, Grap
}
}
bool StreamGraphOptimizer::IsSameStreamId(const ComputeGraphPtr &comp_graph) {
bool StreamGraphOptimizer::IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph) {
if (comp_graph == nullptr) {
return false;
}
std::set<int64_t> stream_set;
std::set<std::string> label_set;
for (const ge::NodePtr &cur_node : comp_graph->GetDirectNode()) {
GE_IF_BOOL_EXEC(cur_node->GetOpDesc() == nullptr, continue);
int64_t stream_id = cur_node->GetOpDesc()->GetStreamId();
if (stream_id == kInvalidStream) {
continue;
}
GELOGD("Node %s in subgraph %s stream id is: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
stream_set.insert(stream_id);
std::string batch_label;
if (AttrUtils::GetStr(cur_node->GetOpDesc(), ATTR_NAME_BATCH_LABEL, batch_label)) {
label_set.insert(batch_label);
} else {
GELOGD("Node %s[%s] has no batch label, subgraph %s, stream id: %ld", cur_node->GetName().c_str(),
cur_node->GetType().c_str(), comp_graph->GetName().c_str(), stream_id);
continue;
}
GELOGD("Node %s in subgraph %s stream id: %ld, node num: %zu", cur_node->GetName().c_str(),
comp_graph->GetName().c_str(), stream_id, comp_graph->GetDirectNodesSize());
}
if (stream_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id, node num: %zu, different stream num: %zu.",
if (stream_set.size() > 1 || label_set.size() > 1) {
GELOGI("Nodes of graph: %s have different stream id or batch_label, node num: %zu, different stream num: %zu.",
comp_graph->GetName().c_str(), comp_graph->GetDirectNodesSize(), stream_set.size());
return false;
}
if (!label_set.empty()) {
(void)AttrUtils::SetStr(comp_graph, ATTR_NAME_BATCH_LABEL, *label_set.begin());
}
return true;
}
@ -99,8 +114,8 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
continue;
}
if (!IsSameStreamId(subgraph)) {
GELOGI("There are more than one stream in subgraph %s", subgraph->GetName().c_str());
if (!IsSameStreamIdOrBatchLabel(subgraph)) {
GELOGI("There are more than one stream or batch_label in subgraph %s", subgraph->GetName().c_str());
continue;
}
OpDescPtr op_desc = nodes.at(0)->GetOpDesc();
@ -112,9 +127,11 @@ Status StreamGraphOptimizer::OptimizeStreamedSubGraph(const ComputeGraphPtr &com
return FAILED;
}
run_context.stream = run_context.graphStreamList[stream_id];
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu.",
subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)));
std::string batch_label;
(void)AttrUtils::GetStr(subgraph, ATTR_NAME_BATCH_LABEL, batch_label);
GELOGD("Subgraph has same stream id, subgraph: %s, engine_name: %s, stream_id: %ld, rtstream: %lu, "
"batch_label: %s", subgraph->GetName().c_str(), engine_name.c_str(), stream_id,
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(run_context.stream)), batch_label.c_str());
for (auto iter = graph_optimizers.begin(); iter != graph_optimizers.end(); ++iter) {
GE_CHECK_NOTNULL(*iter);
Status ret = (*iter)->OptimizeStreamGraph(*subgraph, run_context);

@ -41,7 +41,7 @@ class StreamGraphOptimizer {
private:
void RefreshNodeId(const ComputeGraphPtr &comp_graph, Graph2SubGraphInfoList &subgraph_map);
bool IsSameStreamId(const ComputeGraphPtr &comp_graph);
bool IsSameStreamIdOrBatchLabel(const ComputeGraphPtr &comp_graph);
};
} // namespace ge
#endif // GE_GRAPH_BUILD_OPTIMIZE_STREAM_GRAPH_H_

@ -567,7 +567,7 @@ Status TaskGenerator::MarkFirstAndLastOps(const vector<OpDescPtr> &ops, bool is_
continue;
}
string op_type = op_desc->GetType();
if (!is_single_stream && (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0)) {
if (!op_desc->GetSubgraphInstanceNames().empty() || separator_types.count(op_type) != 0) {
continuous_op_lists.emplace_back(vector<OpDescPtr>());
} else {
continuous_op_lists.back().emplace_back(op_desc);

@ -122,14 +122,14 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
ModelData &model_data) {
Status ret;
if (!CheckInputPathValid(path)) {
GELOGE(GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return GE_EXEC_MODEL_PATH_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID, "model path is invalid: %s", path.c_str());
return ACL_ERROR_GE_EXEC_MODEL_PATH_INVALID;
}
GELOGI("Load model begin, model path is: %s", path.c_str());
if (!key_path.empty() && !CheckInputPathValid(key_path)) {
GELOGE(GE_EXEC_MODEL_KEY_PATH_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return GE_EXEC_MODEL_KEY_PATH_INVALID;
GELOGE(ACL_ERROR_GE_PARAM_INVALID, "decrypt_key path is invalid: %s", key_path.c_str());
return ACL_ERROR_GE_PARAM_INVALID;
}
ret = DavinciModelParser::LoadFromFile(path.c_str(), key_path.c_str(), priority, model_data);
@ -144,63 +144,6 @@ Status GraphLoader::LoadDataFromFile(const std::string &path, const std::string
return SUCCESS;
}
Status GraphLoader::LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id) {
Status ret;
ModelData model_data;
ret = LoadDataFromFile(path, key_path, priority, model_data);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModelFromFile: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}
ret = LoadModel(model_data, listener, model_id);
if (ret != SUCCESS) {
GELOGE(ret, "LoadModel: Load failed. ret = %u", ret);
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
}
if (model_data.model_data != nullptr) {
delete[] static_cast<char *>(model_data.model_data);
model_data.model_data = nullptr;
}
return ret;
}
Status GraphLoader::LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id) {
GELOGI("Load model begin, model_id:%u.", model_id);
// For GeOp, Open Device 0 here.
GE_CHK_RT_RET(rtSetDevice(0));
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(model_id, model_data, listener);
if (ret != SUCCESS) {
GE_CHK_RT(rtDeviceReset(0));
GELOGE(ret, "LoadModel: Load failed.");
return ret;
}
ret = model_manager->Start(model_id);
if (ret != SUCCESS) {
if (model_manager->Unload(model_id) != SUCCESS) {
GELOGE(FAILED, "LoadModel: Unload failed while trying to unload after a failed start.");
}
GELOGE(ret, "LoadModel: Start failed.");
return ret;
}
GELOGI("LoadModel: Start model success, model_id:%u.", model_id);
return SUCCESS;
}
Status GraphLoader::CommandHandle(const Command &command) {
try {
auto model_manager = ModelManager::GetInstance();
@ -225,16 +168,16 @@ Status GraphLoader::CommandHandle(const Command &command) {
}
Status GraphLoader::LoadModelFromData(uint32_t &model_id, const ModelData &model_data, void *dev_ptr,
size_t memsize, void *weight_ptr, size_t weightsize) {
size_t mem_size, void *weight_ptr, size_t weight_size) {
GELOGI("Load model begin, model_id:%u.", model_id);
// For ACL, Open Device from App.
auto model_manager = ModelManager::GetInstance();
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelOffline(
model_id, model_data, nullptr, dev_ptr, memsize, weight_ptr, weightsize);
model_id, model_data, nullptr, dev_ptr, mem_size, weight_ptr, weight_size);
if (ret != SUCCESS) {
GELOGE(ret, "Load model failed, model_id:%u.", model_id);
return ret;
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
}
GELOGI("Load model success, model_id:%u.", model_id);
return SUCCESS;
@ -259,8 +202,8 @@ Status GraphLoader::LoadModelWithQ(uint32_t &model_id, const ModelData &model_da
GE_CHECK_NOTNULL(model_manager);
Status ret = model_manager->LoadModelWithQ(model_id, model_data, input_queue_ids, output_queue_ids);
if (ret != SUCCESS) {
GELOGE(ret, "Load model with queue failed, model_id:%u.", model_id);
return ret;
GELOGE(ACL_ERROR_GE_LOAD_MODEL, "Load model with queue failed, model_id:%u.", model_id);
return ACL_ERROR_GE_LOAD_MODEL;
}
GELOGI("Load model with queue success, model_id:%u.", model_id);

@ -44,12 +44,6 @@ class GraphLoader {
static Status GetMaxUsedMemory(uint32_t model_id, uint64_t &max_size);
static Status LoadModel(const ModelData &model_data, const std::shared_ptr<ModelListener> &listener,
uint32_t &model_id);
static Status LoadModelFromFile(const std::string &path, const std::string &key_path, int32_t priority,
const std::shared_ptr<ModelListener> &listener, uint32_t &model_id);
static Status CommandHandle(const Command &command);
static Status GetMemoryInfo(int64_t &free);

@ -319,6 +319,9 @@ Status DataDumper::GenerateOutput(aicpu::dump::Output &output, const OpDesc::Vis
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
output.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
output.mutable_origin_shape()->add_dim(dim);
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(tensor_descs.at(index), output_size) != SUCCESS) {
GELOGE(PARAM_INVALID, "Get output size filed");
@ -476,6 +479,9 @@ Status DataDumper::GenerateInput(aicpu::dump::Input &input, const OpDesc::Vistor
for (auto dim : tensor_descs.at(index).GetShape().GetDims()) {
input.mutable_shape()->add_dim(dim);
}
for (auto dim : tensor_descs.at(index).GetOriginShape().GetDims()) {
input.mutable_origin_shape()->add_dim(dim);
}
int64_t input_size = 0;
if (AttrUtils::GetInt(tensor_descs.at(index), ATTR_NAME_INPUT_ORIGIN_SIZE, input_size)) {
GELOGI("Get aipp input size according to attr is %ld", input_size);

File diff suppressed because it is too large Load Diff

@ -76,6 +76,20 @@ struct timeInfo {
int64_t dumpEndTime;
};
struct TaskMemInfo {
int64_t input_size{0};
int64_t output_size{0};
int64_t weight_size{0};
int64_t workspace_size{0};
int64_t total_size{0};
};
struct ProfileInfo {
FusionOpInfo fusion_info;
TaskMemInfo memory_info;
uint32_t task_count{0};
};
enum ExecuteMode {
INITIALIZATION,
SYNCHRONIZATION,
@ -226,8 +240,6 @@ class DavinciModel {
const vector<OpDescPtr> &GetDataList() const { return data_op_list_; }
// get Op
const map<uint32_t, OpDescPtr> &GetOpList() const { return op_list_; }
OpDescPtr GetOpByIndex(uint32_t index) const {
if (op_list_.find(index) == op_list_.end()) {
return nullptr;
@ -436,10 +448,6 @@ class DavinciModel {
int64_t GetLoadEndTime() { return load_end_time_; }
Status SinkModelProfile();
Status SinkTimeProfile(const InputData &current_data);
Status ReportProfilingData();
void SaveDumpOpInfo(const RuntimeParam &model_param, const OpDescPtr &op, uint32_t task_id, uint32_t stream_id) {
@ -476,6 +484,14 @@ class DavinciModel {
void SetTotalIOAddrs(vector<void *> &io_addrs) {
total_io_addrs_.insert(total_io_addrs_.end(), io_addrs.begin(), io_addrs.end());
}
void SetHybridArgsSize(uint32_t args_size) { total_hybrid_args_size_ += args_size; }
uint32_t GetHybridArgsSize() {
return total_hybrid_args_size_;
}
void *GetCurrentHybridArgsAddr(uint32_t offset) {
void *cur_args = static_cast<char *>(hybrid_addrs_) + offset;
return cur_args;
}
void SetTotalFixedAddrsSize(string tensor_name, int64_t fix_addr_size);
int64_t GetFixedAddrsSize(string tensor_name);
void *GetCurrentFixedAddr(int64_t offset) const {
@ -494,7 +510,7 @@ class DavinciModel {
Status MallocKnownArgs();
Status UpdateKnownNodeArgs(const vector<void *> &inputs, const vector<void *> &outputs);
Status CreateKnownZeroCopyMap(const vector<void *> &inputs, const vector<void *> &outputs);
Status UpdateKnownZeroCopyAddr();
Status UpdateKnownZeroCopyAddr(vector<void *> &total_io_addrs);
void SetKnownNodeAddrNotChanged(bool base_addr_not_changed) { base_addr_not_changed_ = base_addr_not_changed; }
Status GetOrigInputInfo(uint32_t index, OriginInputInfo &orig_input_info);
@ -529,15 +545,6 @@ class DavinciModel {
struct timeInfo time_info_;
int32_t dataInputTid;
///
/// @ingroup ge
/// @brief Save Batch label Info.
/// @param [in] const OpDescPtr &op_desc
/// @param [in] uintptr_t addr: address value in args block.
/// @return None.
///
void SetBatchLabelAddr(const OpDescPtr &op_desc, uintptr_t addr);
///
/// @ingroup ge
/// @brief Copy Check input size and model op size.
@ -649,14 +656,6 @@ class DavinciModel {
///
void AdjustDataOpList(const map<uint32_t, OpDescPtr> &data_by_index);
///
/// @ingroup ge
/// @brief input zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputZeroCopyNodes(const NodePtr &node);
///
/// @ingroup ge
/// @brief NetOutput Op Initialize.
@ -665,30 +664,6 @@ class DavinciModel {
///
Status InitNetOutput(const NodePtr &node);
///
/// @ingroup ge
/// @brief output zero copy node Initialize.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitOutputZeroCopyNodes(const NodePtr &node);
///
/// @ingroup ge
/// @brief input zero copy node Initialize for Case.
/// @param [in] NodePtr: Data Op.
/// @return Status
///
Status InitInputBatchLabel(const NodePtr &node);
///
/// @ingroup ge
/// @brief output zero copy node Initialize for Case.
/// @param [in] NodePtr: netoutput Op.
/// @return Status
///
Status InitOutputBatchLabel(const NodePtr &node);
///
/// @ingroup ge
/// @brief Constant Op Init.
@ -837,6 +812,11 @@ class DavinciModel {
void SetDataDumperArgs(const ComputeGraphPtr &compute_graph);
Status InitModelProfile();
Status SinkModelProfile();
Status SinkTimeProfile(const InputData &current_data);
Status GenOutputTensorInfo(const OpDescPtr &op_desc, uint32_t data_index, OutputData *output_data,
std::vector<ge::OutputTensorInfo> &outputs);
@ -914,11 +894,6 @@ class DavinciModel {
std::vector<ZeroCopyTask> zero_copy_tasks_; // Task used Data or NetOutput addr.
std::set<const void *> copy_only_addrs_; // Address need copy to original place.
// {op_id, batch_label}
std::map<int64_t, std::string> zero_copy_op_id_batch_label_;
// {batch_label, addrs}
std::map<std::string, std::set<uintptr_t>> zero_copy_batch_label_addrs_;
std::vector<TaskInfoPtr> task_list_;
// rt_moodel_handle
rtModel_t rt_model_handle_;
@ -977,6 +952,8 @@ class DavinciModel {
void *args_ = nullptr;
void *args_host_ = nullptr;
void *fixed_addrs_ = nullptr;
void *hybrid_addrs_ = nullptr;
uint32_t total_hybrid_args_size_ = 0;
int64_t total_fixed_addr_size_ = 0;
std::map<const void *, void *> knonw_input_data_info_;
std::map<const void *, void *> knonw_output_data_info_;
@ -1016,6 +993,9 @@ class DavinciModel {
// key: input_index: input is merge node; value: each gear info and each output shape
std::map<size_t, std::map<vector<int64_t>, vector<int64_t>>> merge_nodes_gear_and_real_out_shape_info_;
std::vector<std::vector<int64_t>> all_gears_info_;
std::multimap<uint32_t, uint32_t> op_id_map_;
std::vector<ProfileInfo> profile_list_;
};
} // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_DAVINCI_MODEL_H_

@ -89,6 +89,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
if (op_type == aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY) {
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto iter = model_aicpu_kernel_.find(model_key);
if (iter != model_aicpu_kernel_.end()) {
GELOGD("kernel destroy session_id %lu, model_id %u.", session_id, model_id);
@ -176,7 +177,7 @@ Status ModelManager::KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType op_type, u
}
void ModelManager::DestroyAicpuSession(uint64_t session_id) {
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = sess_ids_.find(session_id);
if (it == sess_ids_.end()) {
GELOGI("The session: %lu not created.", session_id);
@ -205,7 +206,7 @@ void ModelManager::DestroyAicpuSession(uint64_t session_id) {
}
ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto hybrid_davinci_model = hybrid_model_map_.find(model_id);
if (hybrid_davinci_model != hybrid_model_map_.end()) {
uint64_t session_id = hybrid_davinci_model->second->GetSessionId();
@ -215,8 +216,8 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
auto it = model_map_.find(model_id);
if (it == model_map_.end()) {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return GE_EXEC_MODEL_ID_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", model_id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}
uint64_t session_id = it->second->GetSessionId();
DestroyAicpuSession(session_id);
@ -225,7 +226,7 @@ ge::Status ModelManager::DestroyAicpuSessionForInfer(uint32_t model_id) {
ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_id) {
GELOGD("destroy aicpu kernel in session_id %lu, model_id %u.", session_id, model_id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
Status ret = KernelLaunchEx(aicpu::FWKAdapter::FWKOperateType::FWK_ADPT_KERNEL_DESTROY, session_id, model_id);
@ -238,7 +239,7 @@ ge::Status ModelManager::DestroyAicpuKernel(uint64_t session_id, uint32_t model_
}
ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_id, uint64_t kernel_id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
std::vector<uint64_t> v_aicpu_kernel;
std::string model_key = std::to_string(session_id) + "_" + std::to_string(model_id);
if (model_aicpu_kernel_.find(model_key) != model_aicpu_kernel_.end()) {
@ -250,7 +251,7 @@ ge::Status ModelManager::CreateAicpuKernel(uint64_t session_id, uint32_t model_i
}
ModelManager::~ModelManager() {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
model_map_.clear();
model_aicpu_kernel_.clear();
cust_aicpu_so_.clear();
@ -358,18 +359,18 @@ Status ModelManager::LoadModelOnline(uint32_t &model_id, const shared_ptr<ge::Ge
void ModelManager::InsertModel(uint32_t id, std::shared_ptr<DavinciModel> &davinci_model) {
GE_CHK_BOOL_EXEC(davinci_model != nullptr, return, "davinci_model ptr is null, id: %u", id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
model_map_[id] = davinci_model;
}
void ModelManager::InsertModel(uint32_t id, shared_ptr<hybrid::HybridDavinciModel> &hybrid_model) {
GE_CHK_BOOL_EXEC(hybrid_model != nullptr, return, "hybrid_model ptr is null, id: %u", id);
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
hybrid_model_map_[id] = hybrid_model;
}
Status ModelManager::DeleteModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = model_map_.find(id);
auto hybrid_model_it = hybrid_model_map_.find(id);
@ -384,22 +385,22 @@ Status ModelManager::DeleteModel(uint32_t id) {
} else if (hybrid_model_it != hybrid_model_map_.end()) {
(void)hybrid_model_map_.erase(hybrid_model_it);
} else {
GELOGE(GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return GE_EXEC_MODEL_ID_INVALID;
GELOGE(ACL_ERROR_GE_EXEC_MODEL_ID_INVALID, "model id %u does not exists.", id);
return ACL_ERROR_GE_EXEC_MODEL_ID_INVALID;
}
return SUCCESS;
}
std::shared_ptr<DavinciModel> ModelManager::GetModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = model_map_.find(id);
return (it == model_map_.end()) ? nullptr : it->second;
}
std::shared_ptr<hybrid::HybridDavinciModel> ModelManager::GetHybridModel(uint32_t id) {
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = hybrid_model_map_.find(id);
return (it == hybrid_model_map_.end()) ? nullptr : it->second;
@ -902,7 +903,7 @@ Status ModelManager::GetInputOutputDescInfo(const uint32_t model_id, vector<Inpu
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, GE_EXEC_MODEL_ID_INVALID,
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
davinci_model->SetModelDescVersion(new_model_desc);
@ -970,8 +971,9 @@ Status ModelManager::GetUserDesignateShapeOrder(const uint32_t model_id,
}
Status ModelManager::GetCurShape(const uint32_t model_id, std::vector<int64_t> &batch_info, int32_t &dynamic_type) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
auto davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetCurShape Failed, Invalid Model ID %u!", model_id);
davinci_model->GetCurShape(batch_info, dynamic_type);
return SUCCESS;
}
@ -984,7 +986,8 @@ Status ModelManager::GetModelAttr(uint32_t model_id, std::vector<string> &dynami
}
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHECK_NOTNULL(davinci_model);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetModelAttr Failed, Invalid Model ID %u!", model_id);
davinci_model->GetModelAttr(dynamic_output_shape_info);
return SUCCESS;
}
@ -994,9 +997,8 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
std::vector<uint32_t> &inputFormats,
std::vector<uint32_t> &outputFormats) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, PARAM_INVALID, "GetInputOutputDescInfo Failed, Invalid model id %u!",
model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetInputOutputDescInfo Failed, Invalid model id %u!", model_id);
return davinci_model->GetInputOutputDescInfoForZeroCopy(input_desc, output_desc, inputFormats, outputFormats);
}
@ -1011,18 +1013,14 @@ Status ModelManager::GetInputOutputDescInfoForZeroCopy(const uint32_t model_id,
Status ModelManager::GetAIPPInfo(const uint32_t model_id, uint32_t index, AippConfigInfo &aipp_info) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
return davinci_model->GetAIPPInfo(index, aipp_info);
}
Status ModelManager::GetAippType(uint32_t model_id, uint32_t index, InputAippType &type, size_t &aipp_index) {
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"GetAIPPInfo failed, invalid model_id is %u.",
model_id);
"GetAIPPInfo failed, invalid model_id is %u.", model_id);
return davinci_model->GetAippType(index, type, aipp_index);
}
@ -1055,7 +1053,15 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
mmTimespec timespec = mmGetTickCount();
ModelHelper model_helper;
Status ret = model_helper.LoadModel(model);
Status ret = model_helper.LoadRootModel(model);
if (model_helper.GetModelType()) {
bool is_shape_unknown = false;
GE_CHK_STATUS_RET(model_helper.GetGeRootModel()->CheckIsUnknownShape(is_shape_unknown),
"CheckIsUnknownShape failed, model id:%u", model_id);
if (is_shape_unknown || GetContext().GetHostExecFlag()) {
return DoLoadHybridModelOnline(model_id, model_helper.GetGeRootModel(), listener);
}
}
if (ret != SUCCESS) {
GELOGE(ret, "load model failed.");
return ret;
@ -1069,8 +1075,8 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
} catch (...) {
GELOGE(INTERNAL_ERROR, "Make shared failed since other exception raise");
return INTERNAL_ERROR;
GELOGE(ACL_ERROR_GE_MEMORY_ALLOCATION, "Make shared failed since other exception raise");
return ACL_ERROR_GE_MEMORY_ALLOCATION;
}
ret = davinci_model->Assign(ge_model);
if (ret != SUCCESS) {
@ -1082,7 +1088,7 @@ Status ModelManager::LoadModelOffline(uint32_t &model_id, const ModelData &model
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE || device_id < 0) {
GELOGE(RT_FAILED, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
davinci_model->SetDeviceId(device_id);
@ -1214,7 +1220,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
std::shared_ptr<DavinciModel> davinci_model = GetModel(model_id);
GE_CHK_BOOL_RET_STATUS(davinci_model != nullptr, ACL_ERROR_GE_EXEC_MODEL_ID_INVALID,
"Invalid model id %u, check weather model has been loaded or not.", model_id);
"Invalid model id %u, check whether model has been loaded or not.", model_id);
if (davinci_model->NeedDestroyAicpuKernel()) {
GELOGI("Start to destroy specified aicpu kernel.");
@ -1237,7 +1243,7 @@ Status ModelManager::ExecuteModel(uint32_t model_id, rtStream_t stream, bool asy
}
Status ModelManager::CreateAicpuSession(uint64_t session_id) {
std::lock_guard<std::mutex> lock(sess_ids_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
auto it = sess_ids_.find(session_id);
// never been created by any model
if (it == sess_ids_.end()) {
@ -1456,8 +1462,7 @@ void ModelManager::GenModelId(uint32_t *id) {
if (id == nullptr) {
return;
}
std::lock_guard<std::mutex> lock(map_mutex_);
std::lock_guard<std::recursive_mutex> lock(map_mutex_);
*id = ++max_model_id_;
}

@ -353,8 +353,7 @@ class FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY ModelManager {
std::map<uint32_t, std::shared_ptr<hybrid::HybridDavinciModel>> hybrid_model_map_;
std::map<std::string, std::vector<uint64_t>> model_aicpu_kernel_;
uint32_t max_model_id_;
std::mutex map_mutex_;
std::mutex sess_ids_mutex_;
std::recursive_mutex map_mutex_;
std::mutex session_id_create_mutex_;
static::std::mutex exeception_infos_mutex_;
uint64_t session_id_bias_;

@ -90,20 +90,18 @@ Status KernelTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *davinci
fusion_op_info_.op_index = context.op_index(); fusion_op_info_.original_op_names = original_op_names;
fusion_op_info_.op_name = op_desc_->GetName());
string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
// get bin_file_key
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
// new aicpu kernel(rtCpuKernelLaunch) no need to check function
if (kernel_type_ == ccKernelType::CCE_AI_CORE) {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
rtError_t rt_ret = rtGetFunctionByName(const_cast<char *>(kernel_def.stub_func().c_str()), &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE, GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. stub_func: %s",
kernel_def.stub_func().c_str());
return RT_ERROR_TO_GE_STATUS(rt_ret););
} else if (kernel_type_ == ccKernelType::TE) {
rtError_t rt_ret;
rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
// get bin_file_key
string session_graph_model_id;
davinci_model_->GetUniqueId(op_desc_, session_graph_model_id);
const char *bin_file_key = davinci_model_->GetRegisterStub(op_desc_->GetName(), session_graph_model_id);
rtError_t rt_ret = rtGetFunctionByName(bin_file_key, &stub_func_);
GE_IF_BOOL_EXEC(rt_ret != RT_ERROR_NONE,
GELOGE(RT_FAILED, "execute rtGetFunctionByName failed. bin_file_key: %s", bin_file_key);
return RT_ERROR_TO_GE_STATUS(rt_ret););
@ -372,7 +370,11 @@ Status KernelTaskInfo::SuperKernelDistribute() {
Status KernelTaskInfo::Distribute() {
GELOGD("KernelTaskInfo Distribute Start.");
if (davinci_model_->IsKnownNode()) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
if (kernel_type_ == ccKernelType::TE) {
args_ = davinci_model_->GetCurrentArgsAddr(args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
args_ = davinci_model_->GetCurrentHybridArgsAddr(hybrid_args_offset_);
}
GELOGI("Known node %s args addr %p, offset %u.", op_desc_->GetName().c_str(), args_, args_offset_);
}
rtError_t rt_ret = RT_ERROR_NONE;
@ -428,36 +430,31 @@ Status KernelTaskInfo::UpdateArgs() {
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_data_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc_);
vector<void *> output_data_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc_);
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
vector<void *> io_addrs;
if (!op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) {
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
io_addrs.insert(io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (kernel_type_ == ccKernelType::TE) {
vector<void *> workspace_data_addrs = ModelUtils::GetWorkspaceDataAddrs(rts_param, op_desc_);
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
} else {
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name)) {
uint32_t output_index = davinci_model_->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > output_data_addrs.size()) {
GELOGE(FAILED, "The output data addr size[%zu] and output index[%u] are inconsistent.",
output_data_addrs.size(), output_index);
return FAILED;
}
io_addrs.insert(io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
for (size_t i = 0; i < output_data_addrs.size(); ++i) {
if (i == output_index) {
void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_);
io_addrs.emplace_back(fixed_addr);
continue;
}
io_addrs.emplace_back(output_data_addrs[i]);
}
io_addrs.insert(io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
davinci_model_->SetTotalIOAddrs(io_addrs);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
davinci_model_->UpdateKnownZeroCopyAddr(io_addrs);
uintptr_t io_addr = reinterpret_cast<uintptr_t>(args_addr.get()) + sizeof(aicpu::AicpuParamHead);
auto addrs_size = sizeof(uint64_t) * io_addrs.size();
errno_t sec_ret = memcpy_s(reinterpret_cast<void *>(io_addr), addrs_size, io_addrs.data(), addrs_size);
if (sec_ret != EOK) {
GELOGE(FAILED, "memcpy failed, ret: %d", sec_ret);
return FAILED;
}
// copy args to device
rtError_t rt_ret = rtMemcpy(args_, args_size_, args_addr.get(), args_size_, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "Call rt api(rtMemcpy) failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
}
davinci_model_->SetTotalIOAddrs(io_addrs);
GELOGI("KernelTaskInfo::UpdateArgs success.");
return SUCCESS;
}
@ -533,33 +530,18 @@ Status KernelTaskInfo::UpdateL2Data(const domi::KernelDef &kernel_def) {
}
Status KernelTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) {
domi::KernelDef kernel_def = task_def.kernel();
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
// get opcontext stored in model
const domi::KernelDef &kernel_def = task_def.kernel();
const domi::KernelContext &context = kernel_def.context();
// get opdesc
op_desc_ = davinci_model->GetOpByIndex(context.op_index());
GE_CHECK_NOTNULL(op_desc_);
// alloc fixed addr
string peer_input_name;
if (AttrUtils::GetStr(op_desc_, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) {
uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name);
if (output_index > op_desc_->GetOutputsSize()) {
GELOGE(FAILED, "The output size[%zu] and output index[%u] are inconsistent.", op_desc_->GetOutputsSize(),
output_index);
return FAILED;
}
fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name);
auto tensor_desc = op_desc_->GetOutputDesc(output_index);
int64_t tensor_size = 0;
GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size));
davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size);
GELOGI("Calculate stream switch task args , tensor size is %ld, fixed addr offset %ld", tensor_size,
fixed_addr_offset_);
kernel_type_ = static_cast<ccKernelType>(context.kernel_type());
if (kernel_type_ == ccKernelType::TE) {
uint32_t args_size = kernel_def.args_size();
args_offset_ = davinci_model->GetTotalArgsSize();
davinci_model->SetTotalArgsSize(args_size);
GELOGI("kernel task name , args_size %u, args_offset %u", args_size, args_offset_);
} else if (kernel_type_ == ccKernelType::AI_CPU || kernel_type_ == ccKernelType::CUST_AI_CPU) {
hybrid_args_offset_ = davinci_model->GetHybridArgsSize();
davinci_model->SetHybridArgsSize(kernel_def.args_size());
GELOGI("aicpu kernel task name , args_size %u, args_offset %u", kernel_def.args_size(), hybrid_args_offset_);
}
return SUCCESS;
}
@ -888,7 +870,7 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
// copy args to new host memory
std::unique_ptr<uint8_t[]> args_addr(new (std::nothrow) uint8_t[args_size_]);
args_addr = std::unique_ptr<uint8_t[]>(new (std::nothrow) uint8_t[args_size_]);
GE_PRINT_DYNAMIC_MEMORY(new, "cce task physical memory.", sizeof(uint8_t) * args_size_)
errno_t sec_ret = memcpy_s(args_addr.get(), args_size_, kernel_def.args().data(), args_size_);
if (sec_ret != EOK) {
@ -896,8 +878,23 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
return FAILED;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());
if (davinci_model_->IsKnownNode()) {
return SUCCESS;
}
const RuntimeParam &rts_param = davinci_model_->GetRuntimeParam();
vector<void *> input_addrs = ModelUtils::GetInputDataAddrs(rts_param, op_desc);
vector<void *> output_addrs = ModelUtils::GetOutputDataAddrs(rts_param, op_desc);
vector<void *> io_addrs;
@ -914,19 +911,6 @@ Status KernelTaskInfo::InitAicpuTask(uint32_t op_index, const domi::KernelDef &k
}
}
auto aicpu_param_head = reinterpret_cast<aicpu::AicpuParamHead *>(args_addr.get());
const auto &ext_info = kernel_def.kernel_ext_info();
auto init_ret = InitAicpuTaskExtInfo(ext_info);
if (init_ret != SUCCESS) {
GELOGE(init_ret, "Init aicpu task ext info failed, ext_info size=%zu", ext_info.size());
return init_ret;
}
GELOGI("Node[%s] type[%s] kernel_ext_info size=%zu, aicpu_ext_info_addr_=%p", op_desc_->GetName().c_str(),
op_desc_->GetType().c_str(), ext_info.size(), aicpu_ext_info_addr_);
aicpu_param_head->extInfoAddr = reinterpret_cast<uintptr_t>(aicpu_ext_info_addr_);
aicpu_param_head->extInfoLength = static_cast<uintptr_t>(ext_info.size());
// malloc device memory for args
rtError_t rt_ret = rtMalloc(static_cast<void **>(&args_), args_size_, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {

@ -159,7 +159,9 @@ class KernelTaskInfo : public TaskInfo {
OpDescPtr op_desc_;
DavinciModel *davinci_model_;
uint32_t args_offset_ = 0;
uint32_t hybrid_args_offset_ = 0;
int64_t fixed_addr_offset_ = 0;
std::unique_ptr<uint8_t[]> args_addr = nullptr;
bool call_save_dump_ = false;
// aicpu ext_info device mem

@ -183,22 +183,18 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
addr_count_ = out_count;
}
bool ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
bool set_batch_label_flag = false;
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto &addrs_mapping_list = GetOutsideAddrs();
auto args_addrs = addrs_mapping_list[out_count].find(outside_addr);
if (args_addrs != addrs_mapping_list[out_count].end()) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr);
if (args_addrs != outside_addrs_[out_count].end()) {
GE_CHK_STATUS(zero_copy_task.SetTaskArgsOffset(addr_val, offset), "Input args invalid.");
void *args_val = static_cast<uint8_t *>(args) + offset;
args_addrs->second.push_back(args_val);
GELOGD("[ZCPY] set copy input: virtual_addr: 0x%lx, task_addr: %p, args: %p, offset: %zu.", addr_val, args_val,
args, offset);
set_batch_label_flag = true;
}
}
return set_batch_label_flag;
}
} // namespace ge

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save