print error msg if profiling enabled after hccl init

pull/13031/head
yanghaitao1 4 years ago
parent 359b913d0f
commit c8a4a2e9a5

@ -62,6 +62,7 @@
#include "transform/graph_ir/convert.h"
#include "transform/graph_ir/df_graph_manager.h"
#include "transform/graph_ir/op_adapter_map.h"
#include "runtime/device/ascend/profiling/profiling_manager.h"
#endif
#ifdef ENABLE_DUMP_IR
#include "debug/rdr/running_data_recorder.h"
@ -79,6 +80,10 @@ using mindspore::abstract::AbstractTensorPtr;
using mindspore::abstract::AbstractTuple;
using mindspore::abstract::AbstractTuplePtr;
#if (ENABLE_GE || ENABLE_D)
using mindspore::device::ascend::ProfilingManager;
#endif
const char IR_TYPE_ANF[] = "anf_ir";
const char IR_TYPE_ONNX[] = "onnx_ir";
const char IR_TYPE_MINDIR[] = "mind_ir";
@ -1078,6 +1083,11 @@ void InitHccl() {
(void)context::OpenTsd(ms_context);
}
#endif
#if (ENABLE_GE || ENABLE_D)
if (!ProfilingManager::GetInstance().IsProfiling()) {
ProfilingManager::GetInstance().SetHcclEnabledBefProfilingEnabled();
}
#endif
}
void FinalizeHccl() {

@ -42,7 +42,7 @@ ProfilingManager &ProfilingManager::GetInstance() {
return inst;
}
ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}) {}
ProfilingManager::ProfilingManager() : device_id_(0), prof_cb_({0}), hccl_enabled_bef_profiling_enabled_(false) {}
uint64_t ProfilingManager::GetJobId() const {
const char *job_id = std::getenv("JOB_ID");
@ -139,6 +139,14 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
MS_LOG(INFO) << "No need profiling. please export PROFILING_MODE and in train mode.";
return true;
}
if (hccl_enabled_bef_profiling_enabled_) {
MS_LOG(ERROR)
<< "Please check the Profiler object initialized before mindspore.context.set_auto_parallel_context() "
"and mindspore.communication.management.init(). Profiler should be initialized before these code.";
return false;
}
device_id_ = device_id;
struct MsprofGeOptions prof_conf = {0};

@ -61,6 +61,7 @@ class ProfilingManager {
void SetMsprofReporterCallback(MsprofReporterCallback func) { prof_cb_.msprofReporterCallback = func; }
void SetMsprofSetDeviceCallback(MsprofSetDeviceCallback func) { prof_cb_.msprofSetDeviceCallback = func; }
Status GetProfConf(NotNull<MsprofGeOptions *> prof);
void SetHcclEnabledBefProfilingEnabled() { hccl_enabled_bef_profiling_enabled_ = true; }
protected:
ProfilingManager();
@ -70,6 +71,7 @@ class ProfilingManager {
bool ProfStartUp(NotNull<MsprofGeOptions *> prof_conf);
uint32_t device_id_;
MsprofCallback prof_cb_;
bool hccl_enabled_bef_profiling_enabled_;
};
Status RegProfCtrlCallback(MsprofCtrlCallback func);

@ -256,8 +256,9 @@ class Profiler:
"""Collect and analyse gpu performance data"""
if context.get_auto_parallel_context('device_num') > 1 and self._dev_id != str(get_rank()):
self._dev_id = str(get_rank())
logger.error('Please check the Profiler object initialized after set_auto_parallel_context() '
'and init(). Profiler should be initialized after these code. ')
logger.error('Please check the Profiler object initialized after mindspore.context.set_auto_parallel_'
'context() and mindspore.communication.management.init(). Profiler should be initialized'
' after these code.')
self._gpu_profiler.stop()
timeline_generator = self._generate_timeline()

Loading…
Cancel
Save