|
|
@ -3065,16 +3065,8 @@ Status DavinciModel::MallocKnownArgs() {
|
|
|
|
return SUCCESS;
|
|
|
|
return SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Status DavinciModel::DistributeTask() {
|
|
|
|
void DavinciModel::SaveProfilingTaskDescInfo(const OpDescPtr &op, const TaskInfoPtr &task,
|
|
|
|
GELOGI("do Distribute.");
|
|
|
|
const domi::TaskDef &task_def, size_t task_index) {
|
|
|
|
for (auto &task : cpu_task_list_) {
|
|
|
|
|
|
|
|
if (task == nullptr) {
|
|
|
|
|
|
|
|
GELOGW("task is null");
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(task->Distribute());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
task_desc_info_.clear();
|
|
|
|
task_desc_info_.clear();
|
|
|
|
bool flag = GetL1FusionEnableOption();
|
|
|
|
bool flag = GetL1FusionEnableOption();
|
|
|
|
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
|
|
|
|
char skt_enable_env[MMPA_MAX_PATH] = { 0x00 };
|
|
|
@ -3084,30 +3076,6 @@ Status DavinciModel::DistributeTask() {
|
|
|
|
flag = true;
|
|
|
|
flag = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
|
|
|
|
|
|
|
|
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
|
|
|
|
|
|
|
|
auto &task_def = model_task_def->task(task_index);
|
|
|
|
|
|
|
|
auto &task = task_list_.at(task_index);
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
|
|
|
|
|
|
|
|
// for data dump
|
|
|
|
|
|
|
|
auto op_index = std::max(task_def.kernel().context().op_index(),
|
|
|
|
|
|
|
|
task_def.kernel_ex().op_index());
|
|
|
|
|
|
|
|
OpDescPtr op = GetOpByIndex(op_index);
|
|
|
|
|
|
|
|
GE_CHECK_NOTNULL(op);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
|
|
|
|
|
|
|
|
bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
|
|
|
|
|
|
|
|
if (call_dump || is_op_debug_reg_) {
|
|
|
|
|
|
|
|
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
|
|
|
|
|
|
|
|
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX);
|
|
|
|
|
|
|
|
GE_IF_BOOL_EXEC(no_need_profiling, continue);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
|
|
|
|
|
|
|
|
// Load task info for profiling
|
|
|
|
|
|
|
|
TaskDescInfo task_desc_info;
|
|
|
|
TaskDescInfo task_desc_info;
|
|
|
|
if (!om_name_.empty()) {
|
|
|
|
if (!om_name_.empty()) {
|
|
|
|
task_desc_info.model_name = om_name_;
|
|
|
|
task_desc_info.model_name = om_name_;
|
|
|
@ -3120,6 +3088,25 @@ Status DavinciModel::DistributeTask() {
|
|
|
|
task_desc_info.stream_id = task->GetStreamId();
|
|
|
|
task_desc_info.stream_id = task->GetStreamId();
|
|
|
|
task_desc_info.shape_type = "static";
|
|
|
|
task_desc_info.shape_type = "static";
|
|
|
|
task_desc_info.cur_iter_num = 0;
|
|
|
|
task_desc_info.cur_iter_num = 0;
|
|
|
|
|
|
|
|
// task type
|
|
|
|
|
|
|
|
task_desc_info.task_type = kTaskTypeInvalid;
|
|
|
|
|
|
|
|
auto model_task_type = static_cast<rtModelTaskType_t>(task_def.type());
|
|
|
|
|
|
|
|
if (model_task_type == RT_MODEL_TASK_KERNEL) {
|
|
|
|
|
|
|
|
const domi::KernelDef &kernel_def = task_def.kernel();
|
|
|
|
|
|
|
|
const auto &context = kernel_def.context();
|
|
|
|
|
|
|
|
auto kernel_type = static_cast<ccKernelType>(context.kernel_type());
|
|
|
|
|
|
|
|
if (kernel_type == ccKernelType::TE) {
|
|
|
|
|
|
|
|
task_desc_info.task_type = kTaskTypeAicore;
|
|
|
|
|
|
|
|
} else if (kernel_type == ccKernelType::AI_CPU || kernel_type == ccKernelType::CUST_AI_CPU) {
|
|
|
|
|
|
|
|
task_desc_info.task_type = kTaskTypeAicpu;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
GELOGD("Other kernel type: %u", context.kernel_type());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (model_task_type == RT_MODEL_TASK_KERNEL_EX) {
|
|
|
|
|
|
|
|
task_desc_info.task_type = kTaskTypeAicpu;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
GELOGD("Skip task type: %d", static_cast<int>(model_task_type));
|
|
|
|
|
|
|
|
}
|
|
|
|
profiler_report_op_info_[task_desc_info.op_name] =
|
|
|
|
profiler_report_op_info_[task_desc_info.op_name] =
|
|
|
|
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
|
|
|
|
std::pair<uint32_t, uint32_t>(task_desc_info.task_id, task_desc_info.stream_id);
|
|
|
|
task_desc_info_.emplace_back(task_desc_info);
|
|
|
|
task_desc_info_.emplace_back(task_desc_info);
|
|
|
@ -3134,6 +3121,46 @@ Status DavinciModel::DistributeTask() {
|
|
|
|
task_desc_info_.emplace_back(task_desc_info);
|
|
|
|
task_desc_info_.emplace_back(task_desc_info);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Status DavinciModel::DistributeTask() {
|
|
|
|
|
|
|
|
GELOGI("do Distribute.");
|
|
|
|
|
|
|
|
for (auto &task : cpu_task_list_) {
|
|
|
|
|
|
|
|
if (task == nullptr) {
|
|
|
|
|
|
|
|
GELOGW("task is null");
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(task->Distribute());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const auto &model_task_def = ge_model_->GetModelTaskDefPtr();
|
|
|
|
|
|
|
|
for (size_t task_index = 0; task_index < task_list_.size(); ++task_index) {
|
|
|
|
|
|
|
|
auto &task_def = model_task_def->task(task_index);
|
|
|
|
|
|
|
|
auto &task = task_list_.at(task_index);
|
|
|
|
|
|
|
|
GE_CHECK_NOTNULL(task);
|
|
|
|
|
|
|
|
GE_CHK_STATUS_RET(task->Distribute(), "Task[%zu] distribute fail", task_index);
|
|
|
|
|
|
|
|
// for data dump
|
|
|
|
|
|
|
|
auto op_index = std::max(task_def.kernel().context().op_index(),
|
|
|
|
|
|
|
|
task_def.kernel_ex().op_index());
|
|
|
|
|
|
|
|
OpDescPtr op = GetOpByIndex(op_index);
|
|
|
|
|
|
|
|
GE_CHECK_NOTNULL(op);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (reinterpret_cast<void *>(task->GetDumpArgs()) != nullptr) {
|
|
|
|
|
|
|
|
bool call_dump = GetDumpProperties().IsLayerNeedDump(name_, om_name_, op->GetName()) && task->CallSaveDumpInfo();
|
|
|
|
|
|
|
|
if (call_dump || is_op_debug_reg_) {
|
|
|
|
|
|
|
|
SaveDumpTask(task->GetTaskID(), task->GetStreamId(), op, task->GetDumpArgs());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto task_type = static_cast<rtModelTaskType_t>(task_def.type());
|
|
|
|
|
|
|
|
bool no_need_profiling = (task_type != RT_MODEL_TASK_KERNEL) && (task_type != RT_MODEL_TASK_KERNEL_EX);
|
|
|
|
|
|
|
|
GE_IF_BOOL_EXEC(no_need_profiling, continue);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SaveDumpOpInfo(runtime_param_, op, task->GetTaskID(), task->GetStreamId());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// save task info for profiling
|
|
|
|
|
|
|
|
SaveProfilingTaskDescInfo(op, task, task_def, task_index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// launch dump kernel to aicpu
|
|
|
|
// launch dump kernel to aicpu
|
|
|
|
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
|
|
|
|
GE_CHK_STATUS_RET(data_dumper_.LoadDumpInfo(), "Load dump info failed.");
|
|
|
|