commit
21014fd624
@ -0,0 +1,5 @@
|
|||||||
|
if (ENABLE_GPU)
|
||||||
|
file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc")
|
||||||
|
set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER)
|
||||||
|
add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST})
|
||||||
|
endif ()
|
@ -0,0 +1,134 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include <cupti.h>
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
#include "profiler/device/gpu/cupti_interface.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace profiler {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
inline void *LoadLibrary(const char *name) {
|
||||||
|
auto handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL);
|
||||||
|
if (handle == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Load lib " << name << " Please check whether configured the path of CUPTI to LD_LIBRARY_PATH";
|
||||||
|
}
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *GetCUPTIHandle() {
|
||||||
|
static void *handle = LoadLibrary("libcupti.so");
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *GetCUPTIFunc(const char *name) {
|
||||||
|
static void *handle = GetCUPTIHandle();
|
||||||
|
void *func = dlsym(handle, name);
|
||||||
|
if (func == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Load func " << name << " failed, make sure you have implied it!";
|
||||||
|
}
|
||||||
|
return func;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef CUptiResult (*CuptiSubscribeFunc)(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback,
|
||||||
|
void *userdata);
|
||||||
|
typedef CUptiResult (*CuptiEnableDomainFunc)(uint32_t enable, CUpti_SubscriberHandle subscriber,
|
||||||
|
CUpti_CallbackDomain domain);
|
||||||
|
typedef CUptiResult (*CuptiActivityEnableFunc)(CUpti_ActivityKind kind);
|
||||||
|
typedef CUptiResult (*CuptiActivityRegisterCallbacksFunc)(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
|
||||||
|
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
|
||||||
|
typedef CUptiResult (*CuptiUnsubscribeFunc)(CUpti_SubscriberHandle subscriber);
|
||||||
|
typedef CUptiResult (*CuptiActivityFlushAllFunc)(uint32_t flag);
|
||||||
|
typedef CUptiResult (*CuptiActivityDisableFunc)(CUpti_ActivityKind kind);
|
||||||
|
typedef CUptiResult (*CuptiActivityGetNextRecordFunc)(uint8_t *buffer, size_t validBufferSizeBytes,
|
||||||
|
CUpti_Activity **record);
|
||||||
|
typedef CUptiResult (*CuptiActivityGetNumDroppedRecordsFunc)(CUcontext context, uint32_t streamId, size_t *dropped);
|
||||||
|
typedef CUptiResult (*CuptiGetTimestampFunc)(uint64_t *timestamp);
|
||||||
|
typedef CUptiResult (*CuptiGetResultStringFunc)(CUptiResult result, const char **str);
|
||||||
|
typedef CUptiResult (*CuptiGetStreamIdFunc)(CUcontext context, CUstream stream, uint32_t *streamId);
|
||||||
|
typedef CUptiResult (*CuptiGetDeviceIdFunc)(CUcontext context, uint32_t *deviceId);
|
||||||
|
|
||||||
|
CUptiResult CuptiSubscribe(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, void *userdata) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiSubscribeFunc>(GetCUPTIFunc("cuptiSubscribe"));
|
||||||
|
return func_ptr(subscriber, callback, userdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiEnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiEnableDomainFunc>(GetCUPTIFunc("cuptiEnableDomain"));
|
||||||
|
return func_ptr(enable, subscriber, domain);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityEnable(CUpti_ActivityKind kind) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiActivityEnableFunc>(GetCUPTIFunc("cuptiActivityEnable"));
|
||||||
|
return func_ptr(kind);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityRegisterCallbacks(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
|
||||||
|
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted) {
|
||||||
|
static auto func_ptr =
|
||||||
|
reinterpret_cast<CuptiActivityRegisterCallbacksFunc>(GetCUPTIFunc("cuptiActivityRegisterCallbacks"));
|
||||||
|
return func_ptr(funcBufferRequested, funcBufferCompleted);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiUnsubscribe(CUpti_SubscriberHandle subscriber) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiUnsubscribeFunc>(GetCUPTIFunc("cuptiUnsubscribe"));
|
||||||
|
return func_ptr(subscriber);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityFlushAll(uint32_t flag) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiActivityFlushAllFunc>(GetCUPTIFunc("cuptiActivityFlushAll"));
|
||||||
|
return func_ptr(flag);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityDisable(CUpti_ActivityKind kind) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiActivityDisableFunc>(GetCUPTIFunc("cuptiActivityDisable"));
|
||||||
|
return func_ptr(kind);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityGetNextRecord(uint8_t *buffer, size_t validBufferSizeBytes, CUpti_Activity **record) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiActivityGetNextRecordFunc>(GetCUPTIFunc("cuptiActivityGetNextRecord"));
|
||||||
|
return func_ptr(buffer, validBufferSizeBytes, record);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityGetNumDroppedRecords(CUcontext context, uint32_t streamId, size_t *dropped) {
|
||||||
|
static auto func_ptr =
|
||||||
|
reinterpret_cast<CuptiActivityGetNumDroppedRecordsFunc>(GetCUPTIFunc("cuptiActivityGetNumDroppedRecords"));
|
||||||
|
return func_ptr(context, streamId, dropped);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiGetTimestamp(uint64_t *timestamp) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiGetTimestampFunc>(GetCUPTIFunc("cuptiGetTimestamp"));
|
||||||
|
return func_ptr(timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiGetResultString(CUptiResult result, const char **str) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiGetResultStringFunc>(GetCUPTIFunc("cuptiGetResultString"));
|
||||||
|
return func_ptr(result, str);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *streamId) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiGetStreamIdFunc>(GetCUPTIFunc("cuptiGetStreamId"));
|
||||||
|
return func_ptr(context, stream, streamId);
|
||||||
|
}
|
||||||
|
|
||||||
|
CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId) {
|
||||||
|
static auto func_ptr = reinterpret_cast<CuptiGetDeviceIdFunc>(GetCUPTIFunc("cuptiSubscribe"));
|
||||||
|
return func_ptr(context, deviceId);
|
||||||
|
}
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace profiler
|
||||||
|
} // namespace mindspore
|
@ -0,0 +1,44 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CUPTI_INTERFACE_H
|
||||||
|
#define MINDSPORE_CUPTI_INTERFACE_H
|
||||||
|
#ifndef FUNC_EXPORT
|
||||||
|
#define FUNC_EXPORT __attribute__((visibility("default")))
|
||||||
|
#endif
|
||||||
|
namespace mindspore {
|
||||||
|
namespace profiler {
|
||||||
|
namespace gpu {
|
||||||
|
CUptiResult CuptiSubscribe(CUpti_SubscriberHandle *subscriber, CUpti_CallbackFunc callback, void *userdata);
|
||||||
|
CUptiResult CuptiEnableDomain(uint32_t enable, CUpti_SubscriberHandle subscriber, CUpti_CallbackDomain domain);
|
||||||
|
CUptiResult CuptiGetStreamId(CUcontext context, CUstream stream, uint32_t *streamId);
|
||||||
|
CUptiResult CuptiGetDeviceId(CUcontext context, uint32_t *deviceId);
|
||||||
|
|
||||||
|
CUptiResult CuptiActivityEnable(CUpti_ActivityKind kind);
|
||||||
|
CUptiResult CuptiActivityRegisterCallbacks(CUpti_BuffersCallbackRequestFunc funcBufferRequested,
|
||||||
|
CUpti_BuffersCallbackCompleteFunc funcBufferCompleted);
|
||||||
|
CUptiResult CuptiUnsubscribe(CUpti_SubscriberHandle subscriber);
|
||||||
|
CUptiResult CuptiActivityFlushAll(uint32_t flag);
|
||||||
|
CUptiResult CuptiActivityDisable(CUpti_ActivityKind kind);
|
||||||
|
CUptiResult CuptiActivityGetNextRecord(uint8_t *buffer, size_t validBufferSizeBytes, CUpti_Activity **record);
|
||||||
|
CUptiResult CuptiActivityGetNumDroppedRecords(CUcontext context, uint32_t streamId, size_t *dropped);
|
||||||
|
CUptiResult CuptiGetTimestamp(uint64_t *timestamp);
|
||||||
|
CUptiResult CuptiGetResultString(CUptiResult result, const char **str);
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace profiler
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_CUPTI_INTERFACE_H
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,174 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_GPU_PROFILING_H
|
||||||
|
#define MINDSPORE_GPU_PROFILING_H
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <cupti.h>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <mutex>
|
||||||
|
#include <memory>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace profiler {
|
||||||
|
namespace gpu {
|
||||||
|
enum class CUPTIApiType { kCallback = 0, kActivity = 1 };
|
||||||
|
enum class ActivityType {
|
||||||
|
kKernel = 0,
|
||||||
|
kMemcpyH2D = 1,
|
||||||
|
kMemcpyD2H = 2,
|
||||||
|
kMemcpyH2A = 3,
|
||||||
|
kMemcpyA2H = 4,
|
||||||
|
kMemcpyA2D = 5,
|
||||||
|
kMemcpyD2A = 6,
|
||||||
|
kMemcpyD2D = 7,
|
||||||
|
kMemcpyP2P = 8,
|
||||||
|
kMemcpyH2H = 9,
|
||||||
|
kMemset = 10,
|
||||||
|
kMemcpyUnknown = 11
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MemcpyInfo {
|
||||||
|
size_t bytes;
|
||||||
|
unsigned char src_kind;
|
||||||
|
unsigned char dst_kind;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct KernelInfo {
|
||||||
|
uint64_t registers_per_thread;
|
||||||
|
uint64_t static_shared_memory;
|
||||||
|
uint64_t dynamic_shared_memory;
|
||||||
|
uint64_t block_x;
|
||||||
|
uint64_t block_y;
|
||||||
|
uint64_t block_z;
|
||||||
|
uint64_t grid_x;
|
||||||
|
uint64_t grid_y;
|
||||||
|
uint64_t grid_z;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Event {
|
||||||
|
std::string kernel_name;
|
||||||
|
std::string kernel_type;
|
||||||
|
CUPTIApiType api_type;
|
||||||
|
ActivityType activity_type;
|
||||||
|
uint64_t start_time_stamp;
|
||||||
|
uint64_t end_time_stamp;
|
||||||
|
std::string op_name;
|
||||||
|
uint32_t device_id;
|
||||||
|
uint32_t correlation_id;
|
||||||
|
uint32_t thread_id;
|
||||||
|
int64_t context_id;
|
||||||
|
uint32_t stream_id;
|
||||||
|
CUpti_CallbackId cb_id;
|
||||||
|
union {
|
||||||
|
MemcpyInfo memcpy_info;
|
||||||
|
KernelInfo kernel_info;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OpInfo {
|
||||||
|
std::string op_name;
|
||||||
|
float cupti_api_call_time = 0l;
|
||||||
|
float cupti_activity_time = 0l;
|
||||||
|
float op_host_cost_time = 0;
|
||||||
|
int op_kernel_api_count = 0;
|
||||||
|
int op_kernel_count = 0;
|
||||||
|
int op_count = 0;
|
||||||
|
void *stream;
|
||||||
|
|
||||||
|
MemcpyInfo memcpy_info = {0};
|
||||||
|
KernelInfo kernel_info = {0};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BaseTime {
|
||||||
|
// nanosecond
|
||||||
|
uint64_t host_start_time = 0l;
|
||||||
|
uint64_t gpu_start_time = 0l;
|
||||||
|
};
|
||||||
|
|
||||||
|
const float kTimeUnit = 1000;
|
||||||
|
|
||||||
|
class GPUProfiler {
|
||||||
|
public:
|
||||||
|
static std::shared_ptr<GPUProfiler> GetInstance();
|
||||||
|
~GPUProfiler() { StopCUPTI(); }
|
||||||
|
GPUProfiler(const GPUProfiler &) = delete;
|
||||||
|
GPUProfiler &operator=(const GPUProfiler &) = delete;
|
||||||
|
|
||||||
|
void Init(const std::string &profileDataPath);
|
||||||
|
void Stop();
|
||||||
|
void StopCUPTI();
|
||||||
|
void StepProfilingEnable(const bool enable_flag);
|
||||||
|
void SyncEnable(const bool enable_flag);
|
||||||
|
bool GetEnableFlag() const { return enable_flag_; }
|
||||||
|
bool GetSyncEnableFlag() const { return sync_enable_flag_; }
|
||||||
|
void EventHandleProcess(CUpti_CallbackId cbid, const CUpti_CallbackData *cbdata, const std::string &typestring,
|
||||||
|
uint64_t startTimestamp, uint64_t endTimestamp);
|
||||||
|
void CUPTIAPI AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords);
|
||||||
|
void CUPTIAPI ProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize);
|
||||||
|
void OpDataProducerBegin(const std::string op_name, void *stream);
|
||||||
|
void OpDataProducerEnd();
|
||||||
|
|
||||||
|
private:
|
||||||
|
GPUProfiler() = default;
|
||||||
|
void OpsParser();
|
||||||
|
void EventLog(const Event &event);
|
||||||
|
|
||||||
|
void HandleActivityRecord(CUpti_Activity *record);
|
||||||
|
void AddEvent(Event &&event);
|
||||||
|
void SetRunTimeData(const std::string &op_name, void *stream);
|
||||||
|
void SetRunTimeData(const std::string &op_name, const float time_elapsed);
|
||||||
|
void FixOpNameByCorrelationId(Event *event);
|
||||||
|
|
||||||
|
static std::shared_ptr<GPUProfiler> profiler_inst_;
|
||||||
|
bool enable_flag_ = false;
|
||||||
|
bool sync_enable_flag_ = true;
|
||||||
|
std::unordered_map<std::string, OpInfo> op_info_map_;
|
||||||
|
std::unordered_map<uint32_t, std::string> op_name_map_;
|
||||||
|
std::vector<Event> events_;
|
||||||
|
BaseTime base_time_;
|
||||||
|
std::string op_name_;
|
||||||
|
void *stream_;
|
||||||
|
void SaveProfileData();
|
||||||
|
std::mutex event_mutex_;
|
||||||
|
|
||||||
|
std::vector<CUpti_ActivityKind> activities_enable_;
|
||||||
|
|
||||||
|
uint64_t cupti_callback_events_count_ = 0l;
|
||||||
|
uint64_t cupti_callback_events_drop_count_ = 0l;
|
||||||
|
uint64_t max_cupti_callback_events_ = 2 * 1024 * 10000;
|
||||||
|
|
||||||
|
uint64_t cupti_activity_events_count_ = 0l;
|
||||||
|
uint64_t cupti_activity_events_drop_count_ = 0l;
|
||||||
|
uint64_t max_cupti_activity_events_ = 2 * 1024 * 10000;
|
||||||
|
|
||||||
|
CUpti_SubscriberHandle subscriber_ = nullptr;
|
||||||
|
cudaEvent_t op_event_start_;
|
||||||
|
cudaEvent_t op_event_stop_;
|
||||||
|
uint64_t op_host_time_start_;
|
||||||
|
uint64_t op_host_time_stop_;
|
||||||
|
std::string profile_data_path_;
|
||||||
|
};
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace profiler
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_GPU_PROFILING_H
|
Loading…
Reference in new issue