diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc index 1e92a2eb7b..00b5524901 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc @@ -240,38 +240,6 @@ void GPUProfiler::EventLog(const Event &event) { << ",stream_id:" << event.stream_id << ",cb_id:" << event.cb_id; } -void fillActivityInfo(OpInfo *opInfo, const Event &event) { - if (event.api_type != CUPTIApiType::kActivity) { - return; - } - switch (event.activity_type) { - case ActivityType::kKernel: - opInfo->kernel_info.registers_per_thread = event.kernel_info.registers_per_thread; - opInfo->kernel_info.static_shared_memory = event.kernel_info.static_shared_memory; - opInfo->kernel_info.dynamic_shared_memory = event.kernel_info.dynamic_shared_memory; - opInfo->kernel_info.block_x = event.kernel_info.block_x; - opInfo->kernel_info.block_y = event.kernel_info.block_y; - opInfo->kernel_info.block_z = event.kernel_info.block_z; - opInfo->kernel_info.grid_x = event.kernel_info.grid_x; - opInfo->kernel_info.grid_y = event.kernel_info.grid_y; - opInfo->kernel_info.grid_z = event.kernel_info.grid_z; - break; - case ActivityType::kMemcpyH2D: - case ActivityType::kMemcpyD2H: - case ActivityType::kMemcpyH2A: - case ActivityType::kMemcpyA2H: - case ActivityType::kMemcpyA2D: - case ActivityType::kMemcpyD2A: - case ActivityType::kMemcpyP2P: - case ActivityType::kMemcpyH2H: - case ActivityType::kMemset: - case ActivityType::kMemcpyUnknown: - opInfo->memcpy_info.bytes = event.memcpy_info.bytes; - default: - break; - } -} - void GPUProfiler::OpsParser() { MS_LOG(INFO) << "Count the number of events size:" << events_.size() << " callback api:" << cupti_callback_events_count_ << " activity:" << cupti_activity_events_count_; @@ -311,7 +279,6 @@ void GPUProfiler::OpsParser() { iter->second.op_kernel_count += 1; // The time unit from ns to us iter->second.cupti_activity_time += (event.end_time_stamp - event.start_time_stamp) / kTimeUnit; - fillActivityInfo(&iter->second, event); break; } default: @@ -322,9 +289,7 @@ void GPUProfiler::OpsParser() { MS_LOG(DEBUG) << "GPU_profiler, op_name, op_count , kernel_count, kernel_api_count,|" ",cupti_activity_total_time, cupti_api_call_total_time, op_host_cost_total_time,|" - ",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time,|" - ",mem_bytes,registers_per_thread,static_shared_memory,dynamic_shared_memory" - ",block_x,block_y,block_z,grid_x,grid_y,grid_z" + ",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time" << std::endl; std::vector> order_vec(op_info_map_.begin(), op_info_map_.end()); @@ -342,13 +307,7 @@ void GPUProfiler::OpsParser() { << iter->second.op_host_cost_time << "," << "|," << round(iter->second.cupti_activity_time / iter->second.op_count) << "," << round(iter->second.cupti_api_call_time / iter->second.op_count) << "," - << round(iter->second.op_host_cost_time / iter->second.op_count) << "," - << "|," << iter->second.memcpy_info.bytes << "," << iter->second.kernel_info.registers_per_thread - << "," << iter->second.kernel_info.static_shared_memory << "," - << iter->second.kernel_info.dynamic_shared_memory << "," << iter->second.kernel_info.block_x << "," - << iter->second.kernel_info.block_y << "," << iter->second.kernel_info.block_z << "," - << iter->second.kernel_info.grid_x << "," << iter->second.kernel_info.grid_y << "," - << iter->second.kernel_info.grid_z << std::endl; + << round(iter->second.op_host_cost_time / iter->second.op_count) << std::endl; } } @@ -379,6 +338,11 @@ void CUPTIAPI ActivityProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *b void GPUProfiler::Init(const std::string &profileDataPath = "") { MS_LOG(INFO) << "Initialize GPU Profiling"; + if (subscriber_ != nullptr) { + StopCUPTI(); + MS_LOG(EXCEPTION) + << "Repeated initialization, Please check whether you have created the Profiler object multiple times"; + } CHECK_CUPTI_RET_WITH_EXCEPT(CuptiSubscribe(&subscriber_, (CUpti_CallbackFunc)CUPTICallBackFunc, this), "CuptiSubscribe"); CHECK_CUPTI_RET_WITH_EXCEPT(CuptiEnableDomain(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API), "CuptiEnableDomain"); @@ -516,137 +480,137 @@ void CUPTIAPI ActivityProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *b GPUProfiler::GetInstance()->ProcessBuffer(ctx, streamId, buffer, size, validSize); } -void HandleActivityMemcpyRecord(Event *profillingData, CUpti_Activity *record) { +void HandleActivityMemcpyRecord(Event *profilingData, CUpti_Activity *record) { CUpti_ActivityMemcpy *memcpy = reinterpret_cast(record); switch (memcpy->copyKind) { case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD: - profillingData->activity_type = ActivityType::kMemcpyH2D; - profillingData->kernel_name = "MemcpyH2D"; + profilingData->activity_type = ActivityType::kMemcpyH2D; + profilingData->kernel_name = "MemcpyH2D"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH: - profillingData->activity_type = ActivityType::kMemcpyD2H; - profillingData->kernel_name = "MemcpyD2H"; + profilingData->activity_type = ActivityType::kMemcpyD2H; + profilingData->kernel_name = "MemcpyD2H"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA: - profillingData->activity_type = ActivityType::kMemcpyH2A; - profillingData->kernel_name = "MemcpyH2A"; + profilingData->activity_type = ActivityType::kMemcpyH2A; + profilingData->kernel_name = "MemcpyH2A"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH: - profillingData->activity_type = ActivityType::kMemcpyA2H; - profillingData->kernel_name = "MemcpyA2H"; + profilingData->activity_type = ActivityType::kMemcpyA2H; + profilingData->kernel_name = "MemcpyA2H"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD: - profillingData->activity_type = ActivityType::kMemcpyA2D; - profillingData->kernel_name = "MemcpyA2D"; + profilingData->activity_type = ActivityType::kMemcpyA2D; + profilingData->kernel_name = "MemcpyA2D"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA: - profillingData->activity_type = ActivityType::kMemcpyD2A; - profillingData->kernel_name = "MemcpyD2A"; + profilingData->activity_type = ActivityType::kMemcpyD2A; + profilingData->kernel_name = "MemcpyD2A"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD: - profillingData->activity_type = ActivityType::kMemcpyD2D; - profillingData->kernel_name = "MemcpyD2D"; + profilingData->activity_type = ActivityType::kMemcpyD2D; + profilingData->kernel_name = "MemcpyD2D"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH: - profillingData->activity_type = ActivityType::kMemcpyH2H; - profillingData->kernel_name = "MemcpyH2H"; + profilingData->activity_type = ActivityType::kMemcpyH2H; + profilingData->kernel_name = "MemcpyH2H"; break; case CUPTI_ACTIVITY_MEMCPY_KIND_PTOP: - profillingData->activity_type = ActivityType::kMemcpyP2P; - profillingData->kernel_name = "MemcpyP2P"; + profilingData->activity_type = ActivityType::kMemcpyP2P; + profilingData->kernel_name = "MemcpyP2P"; break; default: - profillingData->activity_type = ActivityType::kMemcpyUnknown; - profillingData->kernel_name = "MemcpyUnknown"; + profilingData->activity_type = ActivityType::kMemcpyUnknown; + profilingData->kernel_name = "MemcpyUnknown"; break; } - profillingData->kernel_type = "cuMemcpy"; - profillingData->api_type = CUPTIApiType::kActivity; - profillingData->start_time_stamp = memcpy->start; - profillingData->end_time_stamp = memcpy->end; - profillingData->device_id = memcpy->deviceId; - profillingData->context_id = memcpy->contextId; - profillingData->stream_id = memcpy->streamId; - profillingData->correlation_id = memcpy->correlationId; - profillingData->memcpy_info.bytes = memcpy->bytes; - profillingData->memcpy_info.src_kind = memcpy->srcKind; - profillingData->memcpy_info.dst_kind = memcpy->dstKind; -} - -void HandleActivityMemcpy2Record(Event *profillingData, CUpti_Activity *record) { + profilingData->kernel_type = "cuMemcpy"; + profilingData->api_type = CUPTIApiType::kActivity; + profilingData->start_time_stamp = memcpy->start; + profilingData->end_time_stamp = memcpy->end; + profilingData->device_id = memcpy->deviceId; + profilingData->context_id = memcpy->contextId; + profilingData->stream_id = memcpy->streamId; + profilingData->correlation_id = memcpy->correlationId; + profilingData->memcpy_info.bytes = memcpy->bytes; + profilingData->memcpy_info.src_kind = memcpy->srcKind; + profilingData->memcpy_info.dst_kind = memcpy->dstKind; +} + +void HandleActivityMemcpy2Record(Event *profilingData, CUpti_Activity *record) { CUpti_ActivityMemcpy2 *memcpyP2P = reinterpret_cast(record); - profillingData->activity_type = ActivityType::kMemcpyP2P; - profillingData->kernel_name = "MemcpyP2P"; - profillingData->kernel_type = "cuMemcpy"; - profillingData->api_type = CUPTIApiType::kActivity; - profillingData->start_time_stamp = memcpyP2P->start; - profillingData->end_time_stamp = memcpyP2P->end; - profillingData->device_id = memcpyP2P->deviceId; - profillingData->context_id = memcpyP2P->contextId; - profillingData->stream_id = memcpyP2P->streamId; - profillingData->correlation_id = memcpyP2P->correlationId; - profillingData->memcpy_info.bytes = memcpyP2P->bytes; - profillingData->memcpy_info.src_kind = memcpyP2P->srcKind; - profillingData->memcpy_info.dst_kind = memcpyP2P->dstKind; -} - -void HandleActivityMemsetRecord(Event *profillingData, CUpti_Activity *record) { + profilingData->activity_type = ActivityType::kMemcpyP2P; + profilingData->kernel_name = "MemcpyP2P"; + profilingData->kernel_type = "cuMemcpy"; + profilingData->api_type = CUPTIApiType::kActivity; + profilingData->start_time_stamp = memcpyP2P->start; + profilingData->end_time_stamp = memcpyP2P->end; + profilingData->device_id = memcpyP2P->deviceId; + profilingData->context_id = memcpyP2P->contextId; + profilingData->stream_id = memcpyP2P->streamId; + profilingData->correlation_id = memcpyP2P->correlationId; + profilingData->memcpy_info.bytes = memcpyP2P->bytes; + profilingData->memcpy_info.src_kind = memcpyP2P->srcKind; + profilingData->memcpy_info.dst_kind = memcpyP2P->dstKind; +} + +void HandleActivityMemsetRecord(Event *profilingData, CUpti_Activity *record) { CUpti_ActivityMemset *memset = reinterpret_cast(record); - profillingData->activity_type = ActivityType::kMemset; - profillingData->kernel_name = "MemorySet"; - profillingData->api_type = CUPTIApiType::kActivity; - profillingData->start_time_stamp = memset->start; - profillingData->end_time_stamp = memset->end; - profillingData->device_id = memset->deviceId; - profillingData->context_id = memset->contextId; - profillingData->stream_id = memset->streamId; - profillingData->correlation_id = memset->correlationId; - profillingData->memcpy_info.bytes = memset->bytes; -} - -void HandleActivityKernelRecord(Event *profillingData, CUpti_Activity *record) { + profilingData->activity_type = ActivityType::kMemset; + profilingData->kernel_name = "MemorySet"; + profilingData->api_type = CUPTIApiType::kActivity; + profilingData->start_time_stamp = memset->start; + profilingData->end_time_stamp = memset->end; + profilingData->device_id = memset->deviceId; + profilingData->context_id = memset->contextId; + profilingData->stream_id = memset->streamId; + profilingData->correlation_id = memset->correlationId; + profilingData->memcpy_info.bytes = memset->bytes; +} + +void HandleActivityKernelRecord(Event *profilingData, CUpti_Activity *record) { CUpti_ActivityKernel4 *kernel = reinterpret_cast(record); - profillingData->activity_type = ActivityType::kKernel; - profillingData->api_type = CUPTIApiType::kActivity; - profillingData->kernel_name = GetKernelFunc(kernel->name); - profillingData->kernel_type = "cuLaunchKernel"; - profillingData->start_time_stamp = kernel->start; - profillingData->end_time_stamp = kernel->end; - profillingData->device_id = kernel->deviceId; - profillingData->context_id = kernel->contextId; - profillingData->stream_id = kernel->streamId; - profillingData->correlation_id = kernel->correlationId; - profillingData->kernel_info.registers_per_thread = kernel->registersPerThread; - profillingData->kernel_info.static_shared_memory = kernel->staticSharedMemory; - profillingData->kernel_info.dynamic_shared_memory = kernel->dynamicSharedMemory; - profillingData->kernel_info.block_x = kernel->blockX; - profillingData->kernel_info.block_y = kernel->blockY; - profillingData->kernel_info.block_z = kernel->blockZ; - profillingData->kernel_info.grid_x = kernel->gridX; - profillingData->kernel_info.grid_y = kernel->gridY; - profillingData->kernel_info.grid_z = kernel->gridZ; + profilingData->activity_type = ActivityType::kKernel; + profilingData->api_type = CUPTIApiType::kActivity; + profilingData->kernel_name = GetKernelFunc(kernel->name); + profilingData->kernel_type = "cuLaunchKernel"; + profilingData->start_time_stamp = kernel->start; + profilingData->end_time_stamp = kernel->end; + profilingData->device_id = kernel->deviceId; + profilingData->context_id = kernel->contextId; + profilingData->stream_id = kernel->streamId; + profilingData->correlation_id = kernel->correlationId; + profilingData->kernel_info.registers_per_thread = kernel->registersPerThread; + profilingData->kernel_info.static_shared_memory = kernel->staticSharedMemory; + profilingData->kernel_info.dynamic_shared_memory = kernel->dynamicSharedMemory; + profilingData->kernel_info.block_x = kernel->blockX; + profilingData->kernel_info.block_y = kernel->blockY; + profilingData->kernel_info.block_z = kernel->blockZ; + profilingData->kernel_info.grid_x = kernel->gridX; + profilingData->kernel_info.grid_y = kernel->gridY; + profilingData->kernel_info.grid_z = kernel->gridZ; } void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) { PROFILER_ERROR_IF_NULLPTR(record); - Event profillingData; - profillingData.cb_id = 0; + Event profilingData; + profilingData.cb_id = 0; switch (record->kind) { case CUPTI_ACTIVITY_KIND_MEMCPY: { - HandleActivityMemcpyRecord(&profillingData, record); + HandleActivityMemcpyRecord(&profilingData, record); break; } case CUPTI_ACTIVITY_KIND_MEMCPY2: { - HandleActivityMemcpy2Record(&profillingData, record); + HandleActivityMemcpy2Record(&profilingData, record); break; } case CUPTI_ACTIVITY_KIND_MEMSET: { - HandleActivityMemsetRecord(&profillingData, record); + HandleActivityMemsetRecord(&profilingData, record); break; } case CUPTI_ACTIVITY_KIND_KERNEL: case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: { - HandleActivityKernelRecord(&profillingData, record); + HandleActivityKernelRecord(&profilingData, record); break; } default: @@ -654,7 +618,7 @@ void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) { return; } - AddEvent(std::move(profillingData)); + AddEvent(std::move(profilingData)); } void CUPTIAPI GPUProfiler::AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords) { diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h index f99364d853..0b58982071 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h @@ -93,9 +93,6 @@ struct OpInfo { int op_kernel_count = 0; int op_count = 0; void *stream; - - MemcpyInfo memcpy_info = {0}; - KernelInfo kernel_info = {0}; }; struct BaseTime {