|
|
|
@ -108,7 +108,7 @@ void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_Callb
|
|
|
|
|
|
|
|
|
|
PROFILER_ERROR_IF_NULLPTR(cb_data);
|
|
|
|
|
if (cb_data->context == nullptr) {
|
|
|
|
|
MS_LOG(DEBUG) << "callback data context is null , correlation Id:" << cb_data->correlationId
|
|
|
|
|
MS_LOG(DEBUG) << "Callback data context is null , correlation Id:" << cb_data->correlationId
|
|
|
|
|
<< " callback id:" << cb_id;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
@ -320,12 +320,12 @@ void GPUProfiler::OpsParser() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MS_LOG(INFO) << "GPU_profiler, op_name, op_count , kernel_count, kernel_api_count,|"
|
|
|
|
|
",cupti_activity_total_time, cupti_api_call_total_time, op_host_cost_total_time,|"
|
|
|
|
|
",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time,|"
|
|
|
|
|
",mem_bytes,registers_per_thread,static_shared_memory,dynamic_shared_memory"
|
|
|
|
|
",block_x,block_y,block_z,grid_x,grid_y,grid_z"
|
|
|
|
|
<< std::endl;
|
|
|
|
|
MS_LOG(DEBUG) << "GPU_profiler, op_name, op_count , kernel_count, kernel_api_count,|"
|
|
|
|
|
",cupti_activity_total_time, cupti_api_call_total_time, op_host_cost_total_time,|"
|
|
|
|
|
",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time,|"
|
|
|
|
|
",mem_bytes,registers_per_thread,static_shared_memory,dynamic_shared_memory"
|
|
|
|
|
",block_x,block_y,block_z,grid_x,grid_y,grid_z"
|
|
|
|
|
<< std::endl;
|
|
|
|
|
|
|
|
|
|
std::vector<std::pair<std::string, OpInfo>> order_vec(op_info_map_.begin(), op_info_map_.end());
|
|
|
|
|
|
|
|
|
@ -335,20 +335,20 @@ void GPUProfiler::OpsParser() {
|
|
|
|
|
std::sort(order_vec.begin(), order_vec.end(), cmp_func);
|
|
|
|
|
|
|
|
|
|
for (auto iter = order_vec.begin(); iter != order_vec.end(); iter++) {
|
|
|
|
|
MS_LOG(INFO) << "GPU_profiler"
|
|
|
|
|
<< "," << iter->first << "," << iter->second.op_count << "," << iter->second.op_kernel_count << ","
|
|
|
|
|
<< iter->second.op_kernel_api_count << ","
|
|
|
|
|
<< "|," << iter->second.cupti_activity_time << "," << iter->second.cupti_api_call_time << ","
|
|
|
|
|
<< round(iter->second.op_host_cost_time) << ","
|
|
|
|
|
<< "|," << round(iter->second.cupti_activity_time / iter->second.op_count) << ","
|
|
|
|
|
<< round(iter->second.cupti_api_call_time / iter->second.op_count) << ","
|
|
|
|
|
<< round(iter->second.op_host_cost_time / iter->second.op_count) << ","
|
|
|
|
|
<< "|," << iter->second.memcpy_info.bytes << "," << iter->second.kernel_info.registers_per_thread
|
|
|
|
|
<< "," << iter->second.kernel_info.static_shared_memory << ","
|
|
|
|
|
<< iter->second.kernel_info.dynamic_shared_memory << "," << iter->second.kernel_info.block_x << ","
|
|
|
|
|
<< iter->second.kernel_info.block_y << "," << iter->second.kernel_info.block_z << ","
|
|
|
|
|
<< iter->second.kernel_info.grid_x << "," << iter->second.kernel_info.grid_y << ","
|
|
|
|
|
<< iter->second.kernel_info.grid_z << std::endl;
|
|
|
|
|
MS_LOG(DEBUG) << "GPU_profiler"
|
|
|
|
|
<< "," << iter->first << "," << iter->second.op_count << "," << iter->second.op_kernel_count << ","
|
|
|
|
|
<< iter->second.op_kernel_api_count << ","
|
|
|
|
|
<< "|," << iter->second.cupti_activity_time << "," << iter->second.cupti_api_call_time << ","
|
|
|
|
|
<< iter->second.op_host_cost_time << ","
|
|
|
|
|
<< "|," << round(iter->second.cupti_activity_time / iter->second.op_count) << ","
|
|
|
|
|
<< round(iter->second.cupti_api_call_time / iter->second.op_count) << ","
|
|
|
|
|
<< round(iter->second.op_host_cost_time / iter->second.op_count) << ","
|
|
|
|
|
<< "|," << iter->second.memcpy_info.bytes << "," << iter->second.kernel_info.registers_per_thread
|
|
|
|
|
<< "," << iter->second.kernel_info.static_shared_memory << ","
|
|
|
|
|
<< iter->second.kernel_info.dynamic_shared_memory << "," << iter->second.kernel_info.block_x << ","
|
|
|
|
|
<< iter->second.kernel_info.block_y << "," << iter->second.kernel_info.block_z << ","
|
|
|
|
|
<< iter->second.kernel_info.grid_x << "," << iter->second.kernel_info.grid_y << ","
|
|
|
|
|
<< iter->second.kernel_info.grid_z << std::endl;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -454,6 +454,7 @@ void GPUProfiler::OpDataProducerEnd() {
|
|
|
|
|
op_host_time_stop_ = GetHostTimeStamp();
|
|
|
|
|
op_time_elapsed = (op_host_time_stop_ - op_host_time_start_) / kTimeUnit;
|
|
|
|
|
}
|
|
|
|
|
MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed;
|
|
|
|
|
SetRunTimeData(op_name_, op_time_elapsed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -478,7 +479,7 @@ void GPUProfiler::Stop() {
|
|
|
|
|
|
|
|
|
|
void GPUProfiler::SaveProfileData() {
|
|
|
|
|
if (profile_data_path_.empty()) {
|
|
|
|
|
MS_LOG(WARNING) << "profile_data_path is empty, skip save profile data.";
|
|
|
|
|
MS_LOG(WARNING) << "Profile data path is empty, skip save profile data.";
|
|
|
|
|
} else {
|
|
|
|
|
DataSaver dataSaver;
|
|
|
|
|
dataSaver.ParseOpInfo(op_info_map_);
|
|
|
|
@ -638,7 +639,7 @@ void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
MS_LOG(WARNING) << "unknown activity type!";
|
|
|
|
|
MS_LOG(WARNING) << "Unknown activity type!";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -651,7 +652,7 @@ void CUPTIAPI GPUProfiler::AllocBuffer(uint8_t **buffer, size_t *size, size_t *m
|
|
|
|
|
MS_LOG(ERROR) << "Out of memory, activity buffer alloc failed.";
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MS_LOG(DEBUG) << "Alloc activity buffer, buffer size: " << BUF_SIZE;
|
|
|
|
|
*size = BUF_SIZE;
|
|
|
|
|
*maxNumRecords = 0;
|
|
|
|
|
}
|
|
|
|
@ -659,12 +660,14 @@ void CUPTIAPI GPUProfiler::AllocBuffer(uint8_t **buffer, size_t *size, size_t *m
|
|
|
|
|
void CUPTIAPI GPUProfiler::ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size,
|
|
|
|
|
size_t validSize) {
|
|
|
|
|
if (!enable_flag_) {
|
|
|
|
|
MS_LOG(DEBUG) << "Profiler is not enable, skip to process activity record.";
|
|
|
|
|
free(buffer);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
CUptiResult status;
|
|
|
|
|
CUpti_Activity *record = NULL;
|
|
|
|
|
|
|
|
|
|
MS_LOG(DEBUG) << "Process activity buffer, valid size:" << validSize << ",Stream ID:" << streamId;
|
|
|
|
|
if (validSize > 0) {
|
|
|
|
|
do {
|
|
|
|
|
status = CuptiActivityGetNextRecord(buffer, validSize, &record);
|
|
|
|
|