diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 90e7462124..42f4381d67 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -54,6 +54,7 @@ #include "profiler/device/ascend/profiling_context.h" #include "profiler/device/ascend/rt_callback_manager.h" #include "utils/config_manager.h" +#include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h" using ge::model_runner::ModelRunner; using mindspore::device::ascend::ProfilingManager; @@ -69,6 +70,7 @@ using std::vector; constexpr uint32_t kTupleTaskId = 0; constexpr uint32_t kTupleStreamId = 1; constexpr uint32_t kTupleArgs = 2; +constexpr uint32_t kProfilingMaxTaskIdInStream = 65531; namespace mindspore { namespace device { @@ -216,6 +218,17 @@ void AsyncDataDumpUninit() { } } +void AscendKernelRuntime::ReportProfilingData() { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + if (context->get_param(MS_CTX_ENABLE_PROFILING) && + context->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode) { + // Save Profiling Framework data + OpNameTaskStreamReporter reporter(device_id_, "nonsink", stream_id_task_id_op_name_map_); + reporter.ReportData(); + } +} + void AscendKernelRuntime::ReleaseDeviceRes() { MS_LOG(INFO) << "Ascend finalize start"; #ifdef ENABLE_DEBUGGER @@ -228,6 +241,7 @@ void AscendKernelRuntime::ReleaseDeviceRes() { return; } InnerSetContext(); + ReportProfilingData(); // release ge runtime ClearGraphModelMap(); @@ -823,6 +837,30 @@ bool AscendKernelRuntime::GraphWithEmptyTaskList(const session::KernelGraph *gra bool AscendKernelRuntime::CheckGraphIdValid(GraphId graph_id) const { return task_map_.find(graph_id) != task_map_.end() && graph_model_map_.find(graph_id) != graph_model_map_.end(); } + +void AscendKernelRuntime::KernelLaunchProfiling(const std::string &kernel_name) { + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + if (!context->get_param(MS_CTX_ENABLE_PROFILING)) { + return; + } + // save task info + uint32_t stream_id; + uint32_t task_id; + auto rt_ret = rtGetTaskIdAndStreamID(&task_id, &stream_id); + if (rt_ret != RT_ERROR_NONE) { + MS_LOG(EXCEPTION) << "Profiling get task_id stream_id failed"; + } + std::pair stream_task_pair = {stream_id, task_id}; + auto try_emplace_ret = stream_id_task_id_op_name_map_.try_emplace(stream_task_pair, kernel_name); + if (!try_emplace_ret.second) { + MS_LOG(WARNING) << "Profiling duplicate key, task_id:" << stream_task_pair.second + << " stream_id:" << stream_task_pair.first << " name:" << kernel_name; + } + if (stream_id_task_id_op_name_map_.size() > kProfilingMaxTaskIdInStream) { + MS_LOG(EXCEPTION) << "Too many profiling data"; + } +} } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h index 7c18bd9c9e..5622757488 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include "runtime/device/kernel_runtime.h" @@ -60,6 +62,8 @@ class AscendKernelRuntime : public KernelRuntime { bool NodeOutputDeviceAddressExist(const AnfNodePtr &node, size_t index) override; bool KernelMemNotReuse(const AnfNodePtr &node) override; + void KernelLaunchProfiling(const std::string &kernel_name) override; + private: bool InitDevice(); bool ResetDevice(); @@ -76,6 +80,7 @@ class AscendKernelRuntime : public KernelRuntime { void LaunchDataDump(GraphId graph_id); static void DumpTaskExceptionInfo(const session::KernelGraph *graph); static void ExceptionCallback(rtExceptionInfo *exception_info); + void ReportProfilingData(); rtContext_t rt_context_{nullptr}; rtContext_t rt_context_hccl_{nullptr}; @@ -84,6 +89,7 @@ class AscendKernelRuntime : public KernelRuntime { unordered_map> graph_model_map_; unordered_map> graph_data_dumper_; static std::vector exception_infos_; + std::map, std::string> stream_id_task_id_op_name_map_; }; MS_REG_KERNEL_RUNTIME(kAscendDevice, AscendKernelRuntime); diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.cc new file mode 100644 index 0000000000..7a21ea0615 --- /dev/null +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.cc @@ -0,0 +1,55 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h" +#include +#include +#include +#include +#include "runtime/device/ascend/profiling/reporter/task_desc_reporter.h" + +namespace mindspore { +namespace device { +namespace ascend { +void OpNameTaskStreamReporter::ReportData() { + MS_LOG(INFO) << "ReportData start"; + + std::map>> op_name_map; + for (auto &iter : stream_id_task_id_op_name_map_) { + auto pair = iter.first; + auto op_name = iter.second; + auto ret = op_name_map.find(op_name); + if (ret == op_name_map.end()) { + auto vect = std::vector>(1, pair); + auto emplace_ret = op_name_map.emplace(op_name, vect); + if (!emplace_ret.second) { + MS_LOG(WARNING) << "Duplicate op_name:" << op_name << " task_id:" << pair.first << " stream_id:" << pair.second; + } + } else { + ret->second.emplace_back(pair); + } + } + + for (const auto &iter : op_name_map) { + auto desc_ptr = std::make_shared(iter.first, iter.second); + prof_desc_list_.emplace_back(desc_ptr); + } + ReportAllLine(); + MS_LOG(INFO) << "ReportData end"; +} +} // namespace ascend +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h new file mode 100644 index 0000000000..6fe142b116 --- /dev/null +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_REPORTER_OP_NAME_TASK_STREAM_REPORTER_H_ +#define MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_REPORTER_OP_NAME_TASK_STREAM_REPORTER_H_ + +#include +#include +#include +#include "runtime/device/ascend/profiling/reporter/desc_reporter.h" + +namespace mindspore { +namespace device { +namespace ascend { +class OpNameTaskStreamReporter : public DescReporter { + public: + OpNameTaskStreamReporter(uint32_t device_id, const std::string &file_name, + std::map, std::string> stream_id_task_id_op_name_map) + : DescReporter(device_id, file_name), stream_id_task_id_op_name_map_(std::move(stream_id_task_id_op_name_map)) {} + ~OpNameTaskStreamReporter() override = default; + void ReportData() override; + + private: + std::map, std::string> stream_id_task_id_op_name_map_; +}; +} // namespace ascend +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_REPORTER_OP_NAME_TASK_STREAM_REPORTER_H_ diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc index 4aec72472c..a8c33b1cb5 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.cc @@ -82,6 +82,19 @@ std::string GraphDesc::DataShapeToString(const std::vector &shape) { oss << "\""; return oss.str(); } + +std::string TaskStreamOpNameDesc::ToString() { + std::string desc = op_name_; + // op_name "task_id stream_id" "task_id stream_id" + for (auto pair : stream_id_task_id_pairs_) { + desc.append(" "); + desc.append(std::to_string(pair.first)); + desc.append("_"); + desc.append(std::to_string(pair.second)); + } + desc.append("\n"); + return desc; +} } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h index 9222d9e096..9d867c6903 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/reporter/profiling_desc.h @@ -81,6 +81,18 @@ class PointDesc : public ProfDesc { private: uint32_t point_id_; }; + +class TaskStreamOpNameDesc : public ProfDesc { + public: + TaskStreamOpNameDesc(std::string op_name, std::vector> stream_id_task_id_pairs) + : ProfDesc(std::move(op_name)), stream_id_task_id_pairs_(std::move(stream_id_task_id_pairs)) {} + + ~TaskStreamOpNameDesc() override = default; + std::string ToString() override; + + private: + std::vector> stream_id_task_id_pairs_; +}; } // namespace ascend } // namespace device } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc index 161d1c3871..5d4f94b7a2 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc @@ -853,6 +853,7 @@ bool KernelRuntime::LaunchKernelMod(const session::KernelGraph &graph) { MS_LOG(ERROR) << "Launch kernel failed."; return false; } + KernelLaunchProfiling(kernels[i]->fullname_with_scope()); } } return true; diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.h b/mindspore/ccsrc/runtime/device/kernel_runtime.h index 91387d3a68..212f0b1228 100644 --- a/mindspore/ccsrc/runtime/device/kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/kernel_runtime.h @@ -116,6 +116,8 @@ class KernelRuntime { void AssignCommunicationNodeInputMem(MemType type, const AnfNodePtr &node); void AssignCommunicationNodeMem(MemType type, const AnfNodePtr &node); + virtual void KernelLaunchProfiling(const std::string &kernel_name) {} + private: void AssignStaticMemoryOutput(const session::KernelGraph *graph); bool LaunchKernelMod(const session::KernelGraph &graph); diff --git a/tests/ut/cpp/stub/runtime/runtime_stub.cc b/tests/ut/cpp/stub/runtime/runtime_stub.cc index 232a34aad6..b6528dc065 100644 --- a/tests/ut/cpp/stub/runtime/runtime_stub.cc +++ b/tests/ut/cpp/stub/runtime/runtime_stub.cc @@ -147,4 +147,6 @@ int AdxDataDumpServerInit() { return 0; } int AdxDataDumpServerUnInit() { return 0; } +RTS_API rtError_t rtGetTaskIdAndStreamID(uint32_t *taskid, uint32_t *streamid) { return RT_ERROR_NONE; } + RTS_API rtError_t rtSetTaskFailCallback(rtTaskFailCallback callback) {return RT_ERROR_NONE; } \ No newline at end of file