Common log optimize

pull/1328/head
liyihan2@huawei.com 4 years ago
parent 14744209b9
commit 4ef1970f6d

@ -56,7 +56,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
dump_properties.SetDumpOpSwitch(dump_op_switch);
if (dump_op_switch == kDumpoff && dump_config.dump_list.empty()) {
dump_properties_map_.emplace(kInferSessionId, dump_properties);
GELOGE(PARAM_INVALID, "Dump list is invalid,dump_op_switch is %s", dump_op_switch.c_str());
GELOGE(PARAM_INVALID, "[Check][DumpList]Failed, dump_op_switch is %s.", dump_op_switch.c_str());
REPORT_INNER_ERROR("E19999", "Check dump list failed, dump_op_switch is %s.", dump_op_switch.c_str());
return PARAM_INVALID;
}
@ -82,7 +83,8 @@ FMK_FUNC_HOST_VISIBILITY FMK_FUNC_DEV_VISIBILITY Status DumpManager::SetDumpConf
dump_path = dump_config.dump_path;
if (dump_path.empty()) {
GELOGE(PARAM_INVALID, "Dump path is empty");
GELOGE(PARAM_INVALID, "[Check][DumpPath]Failed, it is empty.");
REPORT_INNER_ERROR("E19999", "Check dump path failed, it is empty.");
return PARAM_INVALID;
}

@ -99,7 +99,8 @@ Status DumpOp::DumpOutput(aicpu::dump::Task &task) {
}
int64_t output_size = 0;
if (TensorUtils::GetTensorSizeInBytes(output_descs.at(i), output_size) != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get output size failed, output_size:%d.", output_size);
REPORT_INNER_ERROR("E19999", "Get output size failed, output_size:%d.", output_size);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
GELOGD("Get output size in lanch dump op is %ld", output_size);
@ -126,7 +127,8 @@ Status DumpOp::DumpInput(aicpu::dump::Task &task) {
}
int64_t input_size = 0;
if (TensorUtils::GetTensorSizeInBytes(input_descs.at(i), input_size) != SUCCESS) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Get output size filed");
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Get][Param]Get input size failed, input_size:%d.", input_size);
REPORT_INNER_ERROR("E19999", "Get input size failed, input_size:%d.", input_size);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
GELOGD("Get input size in lanch dump op is %ld", input_size);
@ -151,30 +153,31 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
size_t proto_size = op_mapping_info.ByteSizeLong();
bool ret = op_mapping_info.SerializeToString(&proto_msg);
if (!ret || proto_size == 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "Protobuf serialize failed, proto_size is %zu", proto_size);
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR, "[Serialize][Protobuf]Failed, proto_size:%zu.", proto_size);
REPORT_INNER_ERROR("E19999", "Serialize protobuf failed, proto_size:%zu.", proto_size);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
rtError_t rt_ret = rtMalloc(&proto_dev_mem_, proto_size, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Malloc][ProtoDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(proto_dev_mem_, proto_size, proto_msg.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Copy][ProtoDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMalloc(&proto_size_dev_mem_, sizeof(size_t), RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMalloc failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Malloc][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(proto_size_dev_mem_, sizeof(size_t), &proto_size, sizeof(size_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtMemcpy failed, ret: 0x%X", rt_ret);
GELOGE(rt_ret, "[Copy][ProtoSizeDevMem]Failed, ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@ -193,7 +196,7 @@ Status DumpOp::ExecutorDumpOp(aicpu::dump::OpMappingInfo &op_mapping_info) {
nullptr, // no need smDesc
stream_);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtCpuKernelLaunch failed,rt_ret:0x%X", rt_ret);
GELOGE(rt_ret, "[Call][rtCpuKernelLaunch]Failed, rt_ret:0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGI("Kernel launch dump op success");
@ -205,12 +208,12 @@ Status DumpOp::LaunchDumpOp() {
int32_t device_id = 0;
rtError_t rt_ret = rtGetDevice(&device_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(rt_ret, "Call rtGetDevice failed, ret = 0x%X, device_id = %d.", rt_ret, device_id);
GELOGE(rt_ret, "[Call][rtGetDevice]Failed, ret:0x%X, device_id:%d.", rt_ret, device_id);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
if (device_id < 0) {
GELOGE(ACL_ERROR_GE_INTERNAL_ERROR,
"Check device_id failed, device_id = %d, which should be not less than 0.",
"[Check][DeviceId]Failed, device_id:%d, which should be not less than 0.",
device_id);
return ACL_ERROR_GE_INTERNAL_ERROR;
}
@ -240,7 +243,7 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpOutput) {
auto ret = DumpOutput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed");
GELOGE(ret, "[Dump][Output]Failed, error_code:%u.", ret);
return ret;
}
op_mapping_info.mutable_task()->Add(std::move(task));
@ -248,7 +251,7 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpInput) {
auto ret = DumpInput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump input failed");
GELOGE(ret, "[Dump][Input]Failed, error_code:%u.", ret);
return ret;
}
op_mapping_info.mutable_task()->Add(std::move(task));
@ -256,19 +259,19 @@ Status DumpOp::LaunchDumpOp() {
if (dump_properties_.GetDumpMode() == kDumpAll || dump_properties_.IsOpDebugOpen()) {
auto ret = DumpOutput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump output failed when in dumping all");
GELOGE(ret, "[Dump][Output]Failed when in dumping all, error_code:%u.", ret);
return ret;
}
ret = DumpInput(task);
if (ret != SUCCESS) {
GELOGE(ret, "Dump input failed when in dumping all");
GELOGE(ret, "[Dump][Input]Failed when in dumping all, error_code:%u.", ret);
return ret;
}
op_mapping_info.mutable_task()->Add(std::move(task));
}
auto ret = ExecutorDumpOp(op_mapping_info);
if (ret != SUCCESS) {
GELOGE(ret, "Executor dump op failed");
GELOGE(ret, "[Dump][Op]Failed, error_code:%u.", ret);
return ret;
}
return SUCCESS;

@ -27,14 +27,20 @@ Status OpdebugRegister::RegisterDebugForModel(rtModel_t model_handle, uint32_t o
GELOGD("Start to register debug for model in overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in model overflow failed ,ret:0x%X", ret);
GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in model overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in model overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
return ret;
}
uint32_t debug_stream_id = 0;
uint32_t debug_task_id = 0;
auto rt_ret = rtDebugRegister(model_handle, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegister error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Register][rtDebug]Failed in model overflow, ret: 0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Register rtDebug failed in model overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
GELOGD("debug_task_id:%u, debug_stream_id:%u in model overflow", debug_task_id, debug_stream_id);
@ -74,7 +80,9 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de
GELOGD("Start to register debug for stream in stream overflow");
auto ret = MallocMemForOpdebug();
if (ret != SUCCESS) {
GELOGE(ret, "Malloc memory for opdebug in stream overflow ,ret:0x%X", ret);
GELOGE(ret, "[Malloc][MemoryForOpdebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Malloc memory for opdebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.", ret, op_debug_mode);
return ret;
}
@ -83,7 +91,10 @@ Status OpdebugRegister::RegisterDebugForStream(rtStream_t stream, uint32_t op_de
#ifdef ONLY_COMPILE_OPEN_SRC
auto rt_ret = rtDebugRegisterForStream(stream, op_debug_mode, op_debug_addr_, &debug_stream_id, &debug_task_id);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtDebugRegisterForStream error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Register][rtDebug]Failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
REPORT_INNER_ERROR("E19999", "Register rtDebug failed in stream overflow, ret:0x%X, op_debug_mode:%u.",
rt_ret, op_debug_mode);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
#endif
@ -125,7 +136,7 @@ void OpdebugRegister::UnregisterDebugForStream(rtStream_t stream) {
Status OpdebugRegister::MallocMemForOpdebug() {
rtError_t rt_ret = rtMalloc(&op_debug_addr_, kOpDebugMemorySize, RT_MEMORY_DDR);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Malloc][OpDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
@ -133,16 +144,16 @@ Status OpdebugRegister::MallocMemForOpdebug() {
// For data dump, aicpu needs the pointer to pointer that save the real debug address.
rt_ret = rtMalloc(&p2p_debug_addr_, kDebugP2pSize, RT_MEMORY_HBM);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMalloc error, ret: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Malloc][P2PDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
rt_ret = rtMemcpy(p2p_debug_addr_, sizeof(uint64_t), &debug_addrs_tmp, sizeof(uint64_t), RT_MEMCPY_HOST_TO_DEVICE);
if (rt_ret != RT_ERROR_NONE) {
GELOGE(RT_FAILED, "rtMemcpy to p2p_addr error: 0x%X", rt_ret);
GELOGE(RT_FAILED, "[Copy][P2PDebugMem]Failed, ret: 0x%X", rt_ret);
return RT_ERROR_TO_GE_STATUS(rt_ret);
}
return SUCCESS;
}
} // namespace ge
} // namespace ge

Loading…
Cancel
Save