From 94a455dacef300853bcfce266b05710aceabe8ff Mon Sep 17 00:00:00 2001 From: caifubi Date: Tue, 21 Apr 2020 21:35:08 +0800 Subject: [PATCH] insert profiling kernel for hccl automaticly --- .../ascend/profiling/profiling_utils.cc | 66 +++++++++++++++++-- .../device/ascend/profiling/profiling_utils.h | 6 +- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc index aa71aa0566..7960a08938 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.cc @@ -39,13 +39,9 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNullexecution_order(); ProfilingTraceInfo profiling_trace; profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order); - profiling_trace.trace_bp_end = GetTraceBpEnd(); + profiling_trace.trace_bp_end = GetTraceBpEnd(cnode_exec_order); profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order); - MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin - << " trace_bp_end:" << profiling_trace.trace_bp_end - << " trace_netoutput:" << profiling_trace.trace_netoutput; - for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) { std::string env_str = std::string(kCustomNode) + std::to_string(i); const char *node_full_name = std::getenv(env_str.c_str()); @@ -56,9 +52,25 @@ ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull &cnode_exec_order, + NotNull profiling_trace) { + for (const auto &node : cnode_exec_order) { + if (AnfAlgo::IsCommunicationOp(node)) { + MS_EXCEPTION_IF_NULL(node); + profiling_trace->trace_custom_node.insert(node->fullname_with_scope()); + MS_LOG(INFO) << "[profiling]Get hccl node:" << node->fullname_with_scope(); + } + } +} + std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exec_order) { const char *trace_begin = std::getenv(kFpStartNode); auto &first_cnode = cnode_exec_order.front(); @@ -66,9 +78,45 @@ std::string ProfilingUtils::GetTraceBegin(const std::vector &cnode_exe return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin); } -std::string ProfilingUtils::GetTraceBpEnd() { +std::string ProfilingUtils::GetTraceBpEnd(const std::vector &cnode_exec_order) { const char *trace_bp_end = std::getenv(kBpEndNode); - return trace_bp_end == nullptr ? "" : std::string(trace_bp_end); + + if (trace_bp_end != nullptr) { + return std::string(trace_bp_end); + } + std::string bp_end_str = ""; + // Contain hccl kernel + auto iter = cnode_exec_order.rbegin(); + while (iter != cnode_exec_order.rend()) { + if (AnfAlgo::IsCommunicationOp(*iter)) { + // store communication op input nodes' name + std::set ar_input_node_names; + for (size_t i = 0; i < AnfAlgo::GetInputTensorNum(*iter); ++i) { + auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i); + auto input_node = input_node_with_index.first; + ar_input_node_names.insert(input_node->fullname_with_scope()); + } + // start from previous node + ++iter; + // find input names in previous node + while (iter != cnode_exec_order.rend()) { + if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) { + bp_end_str = (*iter)->fullname_with_scope(); + break; + } + ++iter; + } + break; + } + ++iter; + } + + if (bp_end_str.empty()) { + auto last_cnode = cnode_exec_order.back(); + MS_EXCEPTION_IF_NULL(last_cnode); + bp_end_str = last_cnode->fullname_with_scope(); + } + return bp_end_str; } std::string ProfilingUtils::GetTraceNetoutput(const std::vector &cnode_exec_order) { @@ -109,6 +157,7 @@ void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node NotNull graph_ptr, NotNull *> kernel_list) { if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) { + MS_LOG(INFO) << "Profiling Match FpStart:" << profiling_trace_info.trace_begin; auto job_id = ProfilingManager::GetInstance().GetJobId(); ProfilingContent job_profiling_context = {false, job_id, 0}; auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr); @@ -137,6 +186,7 @@ void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const Profili if (iter == profiling_trace_info.trace_custom_node.end()) { return; } + MS_LOG(INFO) << "Profiling Match CustomOp:" << anf_node->fullname_with_scope(); // custom op profiling job start from 3. ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0}; CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr); @@ -153,6 +203,7 @@ void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const Profi NotNull *> kernel_list) { MS_EXCEPTION_IF_NULL(anf_node); if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) { + MS_LOG(INFO) << "Profiling Match BpEnd:" << profiling_trace_info.trace_bp_end; ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0}; CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); kernel_list->emplace_back(bp_end_node); @@ -165,6 +216,7 @@ void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const Profili MS_EXCEPTION_IF_NULL(anf_node); auto full_scope_name = anf_node->fullname_with_scope(); if (profiling_trace_info.trace_netoutput == full_scope_name) { + MS_LOG(INFO) << "Profiling Match IterEnd:" << profiling_trace_info.trace_netoutput; ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0}; CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr); kernel_list->emplace_back(bp_kernel_ptr); diff --git a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h index c59e856249..f9f08c9d3f 100644 --- a/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h +++ b/mindspore/ccsrc/device/ascend/profiling/profiling_utils.h @@ -43,7 +43,7 @@ struct ProfilingTraceInfo { // 3. insert profiling_trace_bp_end. // 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty. - bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); } + bool IsValid() const { return !(trace_begin.empty() || trace_netoutput.empty()); } }; struct ProfilingContent { @@ -109,8 +109,10 @@ class ProfilingUtils { static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content, NotNull graph_ptr); static std::string GetTraceBegin(const std::vector &cnode_exec_order); - static std::string GetTraceBpEnd(); + static std::string GetTraceBpEnd(const std::vector &cnode_exec_order); static std::string GetTraceNetoutput(const std::vector &cnode_exec_order); + static void GetTraceHccl(const std::vector &cnode_exec_order, + NotNull profiling_trace); // graph id --> (kernel name list) static std::unordered_map> graph_kernel_name_;