!2215 Support hccl profiling

Merge pull request !2215 from caifubi/support-hccl-profiling
pull/2215/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 65f2212fd3

@ -125,6 +125,12 @@ bool AscendKernelRuntime::Init() {
}
#endif
// Start up profiling before rtSetDevice
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}
ret = InitDevice();
if (!ret) {
return ret;
@ -133,11 +139,6 @@ bool AscendKernelRuntime::Init() {
MS_EXCEPTION_IF_NULL(mem_manager_);
mem_manager_->MallocDeviceMemory();
ret = ProfilingManager::GetInstance().StartupProfiling(device_id_);
if (!ret) {
MS_EXCEPTION(DeviceProcessError) << "StartupProfiling failed.";
}
initialized_ = true;
return ret;
}

@ -28,6 +28,7 @@
#include "utils/context/ms_context.h"
#include "common/utils.h"
#include "utils/convert_utils.h"
#include "runtime/base.h"
using std::vector;
using Json = nlohmann::json;
@ -159,6 +160,12 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
MS_LOG(INFO) << "profiling config " << cfg;
auto ret = rtProfilerStart();
if (ret != RT_ERROR_NONE) {
MS_LOG(INFO) << "Call rtProfilerStart failed, ret:" << ret;
return false;
}
// call profiling startup API
ProfMgrCfg prof_cfg = {cfg};
prof_handle_ = ProfMgrStartUp(&prof_cfg);
@ -180,6 +187,12 @@ bool ProfilingManager::StopProfiling() {
MS_LOG(INFO) << "report data end, ret = " << reporter->Flush();
}
auto rt_ret = rtProfilerStop();
if (rt_ret != RT_ERROR_NONE) {
MS_LOG(ERROR) << "Call rtProfilerStop failed";
return false;
}
if (prof_handle_ != nullptr) {
int result = ProfMgrStop(prof_handle_);
if (result != 0) {

@ -133,3 +133,7 @@ rtError_t rtGetStreamId(rtStream_t stream, int32_t *streamId) { return RT_ERROR_
rtError_t rtGetFunctionByName(const char *stubName, void **stubFunc) { return RT_ERROR_NONE; }
rtError_t rtSetTaskGenCallback(rtTaskGenCallback callback) { return RT_ERROR_NONE; }
rtError_t rtProfilerStart(void) { return RT_ERROR_NONE; }
rtError_t rtProfilerStop(void) { return RT_ERROR_NONE; }

Loading…
Cancel
Save