diff --git a/mindspore/ccsrc/profiler/CMakeLists.txt b/mindspore/ccsrc/profiler/CMakeLists.txt index 852fc6efe5..cd0813d1fa 100644 --- a/mindspore/ccsrc/profiler/CMakeLists.txt +++ b/mindspore/ccsrc/profiler/CMakeLists.txt @@ -1,14 +1,23 @@ if(ENABLE_GPU) - file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/gpu/*.cc") + file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "device/gpu/*.cc" "device/cpu/*.cc") set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) endif() if(ENABLE_D) - file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/ascend/*.cc" "device/common/*.cc") + file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + "device/common/*.cc" "device/ascend/*.cc" "device/cpu/*.cc") set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) add_dependencies(_mindspore_profiler_obj mindspore::protobuf) endif() + +if(ENABLE_CPU AND NOT (ENABLE_D OR ENABLE_GPU)) + file(GLOB_RECURSE PROFILER_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "device/cpu/*.cc") + set_property(SOURCE ${PROFILER_SRC_LIST} PROPERTY COMPILE_DEFINITIONS + SUBMODULE_ID=mindspore::SubModuleId::SM_PROFILER) + add_library(_mindspore_profiler_obj OBJECT ${PROFILER_SRC_LIST}) +endif() \ No newline at end of file diff --git a/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.cc b/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.cc new file mode 100644 index 0000000000..5a25b288e6 --- /dev/null +++ b/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.cc @@ -0,0 +1,174 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "profiler/device/cpu/cpu_data_saver.h" +#include +#include +#include "sys/stat.h" +#include "utils/log_adapter.h" +#include "utils/ms_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace profiler { +namespace cpu { +OpDetailInfo::OpDetailInfo(std::shared_ptr op_info, float proportion) + : op_info_(op_info), proportion_(proportion) { + // op_full_name is like 'xxx/xxx/{op_type}-op{node_id}' + op_full_name_ = op_info->op_name; + auto op_type_begin_iter = op_full_name_.rfind('/') + 1; + auto op_type_end_iter = op_full_name_.rfind('-'); + op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); + op_name_ = op_full_name_.substr(op_type_begin_iter); + op_avg_time_ = op_info->op_cost_time / op_info->op_count; +} + +void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { + const float factor_percent = 100; + op_detail_infos_.reserve(op_info_maps.size()); + float total_time_sum = GetTotalOpTime(op_info_maps); + for (auto item : op_info_maps) { + op_timestamps_map_[item.first] = item.second.start_duration; + float proportion = item.second.op_cost_time / total_time_sum * factor_percent; + auto op_info = std::make_shared(item.second); + OpDetailInfo op_detail_info = OpDetailInfo(op_info, proportion); + op_detail_infos_.emplace_back(op_detail_info); + AddOpDetailInfoForType(op_detail_info); + } + // update average time of op type + for (auto &op_type : op_type_infos_) { + // device_infos: + op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; + } + MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; + MS_LOG(DEBUG) << "Get " << op_type_infos_.size() << " operation type items."; +} + +void DataSaver::AddOpDetailInfoForType(const OpDetailInfo &op_detail_info) { + // Construct OpType object according to op detail info + OpType op_type = OpType{op_detail_info.op_type_, + op_detail_info.op_info_->op_count, + op_detail_info.op_info_->op_count, + op_detail_info.op_info_->op_cost_time, + 0, + op_detail_info.proportion_}; + // Set the OpType into op_type_infos_ map + std::string type_name = op_detail_info.op_type_; + auto iter = op_type_infos_.find(type_name); + if (iter == op_type_infos_.end()) { + op_type_infos_.emplace(type_name, op_type); + } else { + iter->second += op_type; + } +} + +float DataSaver::GetTotalOpTime(const OpInfoMap &op_info_maps) { + float sum = 0; + sum = std::accumulate(op_info_maps.begin(), op_info_maps.end(), sum, + [](float i, auto iter) { return i + iter.second.op_cost_time; }); + MS_LOG(DEBUG) << "The total op time is " << sum; + return sum; +} + +void DataSaver::WriteFile(std::string out_path_dir) { + if (op_detail_infos_.empty() || op_type_infos_.empty()) { + MS_LOG(INFO) << "No cpu operation detail infos to write."; + return; + } + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto device_id = context_ptr->get_param(MS_CTX_DEVICE_ID); + device_id_ = std::to_string(device_id); + WriteOpDetail(out_path_dir); + WriteOpType(out_path_dir); + WriteOpTimestamp(out_path_dir); +} + +void DataSaver::WriteOpType(const std::string &saver_base_dir) { + std::string file_path = saver_base_dir + "/cpu_op_type_info_" + device_id_ + ".csv"; + std::ofstream ofs(file_path); + // check if the file is writable + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + try { + // write op type info into file + ofs << OpType().GetHeader() << std::endl; + for (auto op_type_info : op_type_infos_) { + ofs << op_type_info.second << std::endl; + } + } catch (const std::exception &e) { + MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); + } + ofs.close(); + ChangeFileMode(file_path); + MS_LOG(INFO) << "Write " << op_type_infos_.size() << " op type infos into file: " << file_path; +} + +void DataSaver::WriteOpDetail(const std::string &saver_base_dir) { + std::string file_path = saver_base_dir + "/cpu_op_detail_info_" + device_id_ + ".csv"; + std::ofstream ofs(file_path); + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + try { + // write op detail info into file + ofs << OpDetailInfo().GetHeader() << std::endl; + for (auto op_detail : op_detail_infos_) { + ofs << op_detail << std::endl; + } + } catch (const std::exception &e) { + MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); + } + ofs.close(); + ChangeFileMode(file_path); + MS_LOG(INFO) << "Write " << op_detail_infos_.size() << " op detail infos into file: " << file_path; +} + +void DataSaver::WriteOpTimestamp(const std::string &saver_base_dir) { + std::string file_path = saver_base_dir + "/cpu_op_execute_timestamp_" + device_id_ + ".txt"; + std::ofstream ofs(file_path); + // check if the file is writable + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + try { + // write op timestamp info into file + for (const auto &op_timestamp_info : op_timestamps_map_) { + ofs << op_timestamp_info.first << ";host_cpu_ops;"; + for (auto start_end : op_timestamp_info.second) { + ofs << start_end.start_timestamp << "," << start_end.duration << " "; + } + ofs << std::endl; + } + } catch (const std::exception &e) { + MS_LOG(ERROR) << "Write " << file_path << "failed: " << e.what(); + } + ofs.close(); + ChangeFileMode(file_path); +} + +void DataSaver::ChangeFileMode(const std::string &file_path) { + if (chmod(common::SafeCStr(file_path), S_IRUSR) == -1) { + MS_LOG(WARNING) << "Modify file: " << file_path << " to rw fail."; + return; + } +} +} // namespace cpu +} // namespace profiler +} // namespace mindspore diff --git a/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.h b/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.h new file mode 100644 index 0000000000..0cfbf44e1c --- /dev/null +++ b/mindspore/ccsrc/profiler/device/cpu/cpu_data_saver.h @@ -0,0 +1,123 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CPU_DATA_SAVER_H +#define MINDSPORE_CPU_DATA_SAVER_H +#include +#include +#include +#include +#include +#include +#include "profiler/device/cpu/cpu_profiling.h" +namespace mindspore { +namespace profiler { +namespace cpu { +struct OpDetailInfo { + std::string op_type_; + std::string op_name_; + std::string op_full_name_; + std::shared_ptr op_info_{nullptr}; + float op_avg_time_{0}; + float proportion_{0}; + + OpDetailInfo() = default; + + OpDetailInfo(std::shared_ptr op_info, float proportion); + + std::string GetHeader() const { + return "op_side,op_type,op_name,full_op_name,op_occurrences,compute_time(ms)," + "avg_execution_time(ms),total_proportion,subgraph,pid"; + } + + friend std::ostream &operator<<(std::ostream &os, const OpDetailInfo &event) { + os << "Host," << event.op_type_ << ',' << event.op_name_ << ',' << event.op_full_name_ << ',' + << event.op_info_->op_count << ',' << event.op_info_->op_cost_time << ',' << event.op_avg_time_ << ',' + << event.proportion_ << ",Default," << event.op_info_->pid; + return os; + } +}; + +struct OpType { + std::string op_type_; + int count_{0}; + int step_{0}; + float total_time_{0}; + float avg_time_{0}; + float proportion_{0}; + + std::string GetHeader() const { + return "op_type,total_called_times,called_times(per-step)," + "total_compute_time,compute_time(ms per-step),percent"; + } + + friend std::ostream &operator<<(std::ostream &os, const OpType &event) { + os << event.op_type_ << ',' << event.count_ << ',' << event.count_ / event.step_ << ',' << event.total_time_ << ',' + << event.total_time_ / event.step_ << ',' << event.proportion_; + return os; + } + + OpType &operator+=(const OpType &other) { + this->count_ += other.count_; + this->total_time_ += other.total_time_; + this->proportion_ += other.proportion_; + return *this; + } +}; + +using OpInfoMap = std::unordered_map; +using OpTypeInfos = std::unordered_map; // +using OpDetailInfos = std::vector; +// +using OpTimestampInfo = std::unordered_map>; + +class DataSaver { + public: + DataSaver() = default; + + ~DataSaver() = default; + + DataSaver(const DataSaver &) = delete; + + DataSaver &operator=(const DataSaver &) = delete; + + void ParseOpInfo(const OpInfoMap &op_info_maps); + + void WriteFile(std::string out_path); + + private: + void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); + + float GetTotalOpTime(const OpInfoMap &op_info_maps); + + void WriteOpType(const std::string &saver_base_dir); + + void WriteOpDetail(const std::string &saver_base_dir); + + void WriteOpTimestamp(const std::string &saver_base_dir); + + void ChangeFileMode(const std::string &file_path); + + std::string device_id_; + OpTypeInfos op_type_infos_; + OpDetailInfos op_detail_infos_; + OpTimestampInfo op_timestamps_map_; +}; +} // namespace cpu +} // namespace profiler +} // namespace mindspore + +#endif // MINDSPORE_CPU_DATA_SAVER_H diff --git a/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc b/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc new file mode 100644 index 0000000000..b6a698e4d5 --- /dev/null +++ b/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.cc @@ -0,0 +1,136 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profiler/device/cpu/cpu_profiling.h" + +#include +#include +#include +#include "profiler/device/cpu/cpu_data_saver.h" +#include "pybind_api/api_register.h" +#include "utils/log_adapter.h" +#include "utils/utils.h" + +namespace mindspore { +namespace profiler { +namespace cpu { +std::shared_ptr CPUProfiler::profiler_inst_ = nullptr; + +uint64_t GetMonoTimeStamp() { + struct timespec ts; +#if defined(_WIN32) || defined(_WIN64) + clock_gettime(CLOCK_MONOTONIC, &ts); +#else + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); +#endif + constexpr uint64_t kNSecondInSecond = 1000000000; + uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; + return cur_time_stamp; +} + +std::shared_ptr CPUProfiler::GetInstance() { + if (profiler_inst_ == nullptr) { + profiler_inst_ = std::shared_ptr(new (std::nothrow) CPUProfiler()); + } + return profiler_inst_; +} + +void CPUProfiler::Init(const std::string &profileDataPath = "") { + MS_LOG(INFO) << "Initialize CPU Profiling"; + base_time_ = GetMonoTimeStamp(); + profile_data_path_ = profileDataPath; + MS_LOG(INFO) << " Host start time(ns): " << base_time_ << " profile data path: " << profile_data_path_; +} + +void CPUProfiler::StepProfilingEnable(const bool enable_flag) { + MS_LOG(INFO) << "CPU Profiler enable flag: " << enable_flag; + enable_flag_ = enable_flag; +} + +void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint32_t pid) { + auto iter = op_info_map_.find(op_name); + if (iter != op_info_map_.end()) { + iter->second.op_count += 1; + } else { + OpInfo op_info; + op_info.op_name = op_name; + op_info.pid = pid; + op_info.op_count = 1; + op_info_map_[op_name] = op_info; + } + op_name_ = op_name; + pid_ = pid; +} + +void CPUProfiler::SetRunTimeData(const std::string &op_name, const float time_elapsed) { + auto iter = op_info_map_.find(op_name); + if (iter != op_info_map_.end()) { + // The time unit is ms, convert to us + iter->second.op_cost_time += time_elapsed; + } +} + +void CPUProfiler::SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration) { + auto iter = op_info_map_.find(op_name); + if (iter != op_info_map_.end()) { + iter->second.start_duration.emplace_back(StartDuration({start, duration})); + } +} + +void CPUProfiler::OpDataProducerBegin(const std::string op_name, const uint32_t pid) { + op_time_start_ = GetMonoTimeStamp(); + op_time_mono_start_ = GetMonoTimeStamp(); + SetRunTimeData(op_name, pid); +} + +void CPUProfiler::OpDataProducerEnd() { + float op_time_elapsed = 0; + op_time_stop_ = GetMonoTimeStamp(); + op_time_elapsed = (op_time_stop_ - op_time_start_) / kTimeUnit; + MS_LOG(DEBUG) << "Host Time Elapsed(us)," << op_name_ << "," << op_time_elapsed; + SetRunTimeData(op_name_, op_time_elapsed); + SetRunTimeData(op_name_, op_time_mono_start_, op_time_elapsed); +} + +void CPUProfiler::Stop() { + MS_LOG(INFO) << "Stop CPU Profiling"; + SaveProfileData(); + ClearInst(); +} + +void CPUProfiler::SaveProfileData() { + if (profile_data_path_.empty()) { + MS_LOG(WARNING) << "Profile data path is empty, skip save profile data."; + } else { + DataSaver dataSaver; + dataSaver.ParseOpInfo(op_info_map_); + dataSaver.WriteFile(profile_data_path_); + } +} + +void CPUProfiler::ClearInst() { op_info_map_.clear(); } + +REGISTER_PYBIND_DEFINE(CPUProfiler_, ([](const py::module *m) { + (void)py::class_>(*m, "CPUProfiler") + .def_static("get_instance", &CPUProfiler::GetInstance, "CPUProfiler get_instance.") + .def("init", &CPUProfiler::Init, py::arg("profile_data_path"), "init") + .def("stop", &CPUProfiler::Stop, "stop") + .def("step_profiling_enable", &CPUProfiler::StepProfilingEnable, py::arg("enable_flag"), + "enable or disable step profiling"); + })); +} // namespace cpu +} // namespace profiler +} // namespace mindspore diff --git a/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h b/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h new file mode 100644 index 0000000000..de2057f03a --- /dev/null +++ b/mindspore/ccsrc/profiler/device/cpu/cpu_profiling.h @@ -0,0 +1,86 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CPU_PROFILING_H +#define MINDSPORE_CPU_PROFILING_H +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace mindspore { +namespace profiler { +namespace cpu { +struct StartDuration { + uint64_t start_timestamp = 0l; + float duration = 0l; +}; + +struct OpInfo { + std::string op_name; + float op_cost_time = 0; + int op_count = 0; + std::vector start_duration; + uint32_t pid; +}; + +const float kTimeUnit = 1000; + +class CPUProfiler { + public: + static std::shared_ptr GetInstance(); + ~CPUProfiler() = default; + CPUProfiler(const CPUProfiler &) = delete; + CPUProfiler &operator=(const CPUProfiler &) = delete; + + void Init(const std::string &profileDataPath); + void Stop(); + void StepProfilingEnable(const bool enable_flag); + bool GetEnableFlag() const { return enable_flag_; } + void OpDataProducerBegin(const std::string op_name, const uint32_t pid); + void OpDataProducerEnd(); + std::string ProfileDataPath() const { return profile_data_path_; } + + private: + CPUProfiler() = default; + void ClearInst(); + void SetRunTimeData(const std::string &op_name, const uint32_t pid); + void SetRunTimeData(const std::string &op_name, const float time_elapsed); + void SetRunTimeData(const std::string &op_name, const uint64_t start, const float duration); + + static std::shared_ptr profiler_inst_; + bool enable_flag_ = false; + std::unordered_map op_info_map_; + uint64_t base_time_; + std::string op_name_; + uint32_t pid_; + void SaveProfileData(); + + uint64_t op_time_start_; + uint64_t op_time_mono_start_; + uint64_t op_time_stop_; + std::string profile_data_path_; +}; +} // namespace cpu +} // namespace profiler +} // namespace mindspore + +#endif // MINDSPORE_CPU_PROFILING_H diff --git a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc index 5703c9e0e8..7c0122999a 100644 --- a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc +++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc @@ -153,7 +153,7 @@ void DataSaver::AddKernelEventToDevice(const Event &event, DeviceActivityInfos * } } -void DataSaver::WriteFile(std::string out_path_dir) { +void DataSaver::WriteFile(std::string out_path_dir, const BaseTime &start_time) { if (out_path_dir.empty()) { MS_LOG(WARNING) << "Output directory. Ignore the writing data."; return; @@ -169,6 +169,7 @@ void DataSaver::WriteFile(std::string out_path_dir) { WriteActivity(out_path_dir); WriteOpTimestamp(out_path_dir); WriteStepTrace(out_path_dir); + WriteStartTime(out_path_dir, start_time); } void DataSaver::WriteOpType(const std::string &saver_base_dir) { @@ -307,6 +308,28 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) { MS_LOG(INFO) << "Write step trace infos into file: " << file_path; } +void DataSaver::WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time) { + std::string file_path = saver_base_dir + "/start_time_" + device_id_ + ".txt"; + std::ofstream ofs(file_path); + // check if the file is writable + if (!ofs.is_open()) { + MS_LOG(WARNING) << "Open file '" << file_path << "' failed!"; + return; + } + + // write start time info into file + try { + ofs << "host_monotonic_raw_time(ns): " << start_time.host_start_monotonic_raw_time << std::endl; + ofs << "gpu_start_time(ns): " << start_time.gpu_start_time << std::endl; + } catch (const std::exception &e) { + MS_LOG(ERROR) << "Write " << file_path << "failed:" << e.what(); + } + + ofs.close(); + ChangeFileMode(file_path); + MS_LOG(INFO) << "Write profiler start time infos into file: " << file_path; +} + void DataSaver::SetStepTraceOpName(ProfilingTraceInfo trace_op_name) { step_trace_op_name = trace_op_name; } void DataSaver::ChangeFileMode(const std::string &file_path) { diff --git a/mindspore/ccsrc/profiler/device/gpu/data_saver.h b/mindspore/ccsrc/profiler/device/gpu/data_saver.h index f28f8e0c39..da1e65fb96 100644 --- a/mindspore/ccsrc/profiler/device/gpu/data_saver.h +++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.h @@ -129,7 +129,7 @@ class DataSaver { void ParseEvent(const std::vector &events); - void WriteFile(std::string out_path); + void WriteFile(std::string out_path, const BaseTime &start_time); private: void AddOpDetailInfoForType(const OpDetailInfo &op_detail_info); @@ -150,6 +150,8 @@ class DataSaver { void WriteStepTrace(const std::string &saver_base_dir); + void WriteStartTime(const std::string &saver_base_dir, const BaseTime &start_time); + void ChangeFileMode(const std::string &file_path); std::string device_id_; diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc index 9e94512d78..5e5adbca5c 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "profiler/device/gpu/gpu_profiling.h" +#include #include #include #include @@ -91,6 +92,14 @@ uint64_t GetHostTimeStamp() { return cur_time_stamp; } +uint64_t GetHostMonoTimeStamp() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + constexpr uint64_t kNSecondInSecond = 1000000000; + uint64_t cur_time_stamp = ts.tv_sec * kNSecondInSecond + ts.tv_nsec; + return cur_time_stamp; +} + std::string GetKernelFunc(const char *name) { char *demangledName = abi::__cxa_demangle(name, nullptr, nullptr, nullptr); if (demangledName != nullptr) { @@ -100,16 +109,24 @@ std::string GetKernelFunc(const char *name) { } } -void CUPTIApiExit(const std::shared_ptr &gpu_profiler_inst, CUpti_CallbackId cb_id, - const CUpti_CallbackData *cb_data) { - uint64_t start_timestamp = *cb_data->correlationData; - uint64_t end_timestamp = GetCUPTITimeStamp(); +bool IsMemcpyAsyncEvent(CUpti_CallbackId cb_id) { + switch (cb_id) { + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2: + case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync: + return true; + } + return false; +} + +bool IsMemcpySyncEvent(CUpti_CallbackId cb_id) { switch (cb_id) { - case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel: - case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel: - case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice: - gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp); - break; case CUPTI_DRIVER_TRACE_CBID_cuMemcpy: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoD_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoH_v2: @@ -122,17 +139,21 @@ void CUPTIApiExit(const std::shared_ptr &gpu_profiler_inst, CUpti_C case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DUnaligned_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3D_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoA_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAsync: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoDAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoHAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyDtoDAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyAtoHAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpy2DAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpy3DAsync_v2: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyHtoAAsync_v2: case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeer: - case CUPTI_DRIVER_TRACE_CBID_cuMemcpyPeerAsync: - gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp); + return true; + } + return false; +} + +void CUPTIApiExit(const std::shared_ptr &gpu_profiler_inst, CUpti_CallbackId cb_id, + const CUpti_CallbackData *cb_data) { + uint64_t start_timestamp = *cb_data->correlationData; + uint64_t end_timestamp = GetCUPTITimeStamp(); + switch (cb_id) { + case CUPTI_DRIVER_TRACE_CBID_cuLaunchKernel: + case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernel: + case CUPTI_DRIVER_TRACE_CBID_cuLaunchCooperativeKernelMultiDevice: + gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuLaunchKernel", start_timestamp, end_timestamp); break; case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc: case CUPTI_DRIVER_TRACE_CBID_cuMemAlloc_v2: @@ -151,6 +172,9 @@ void CUPTIApiExit(const std::shared_ptr &gpu_profiler_inst, CUpti_C gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "others_api", start_timestamp, end_timestamp); break; } + if (IsMemcpyAsyncEvent(cb_id) || IsMemcpySyncEvent(cb_id)) { + gpu_profiler_inst->EventHandleProcess(cb_id, cb_data, "cuMemcpy", start_timestamp, end_timestamp); + } } void CUPTICallBackFunc(void *user_data, CUpti_CallbackDomain domain, CUpti_CallbackId cb_id, @@ -369,6 +393,7 @@ void GPUProfiler::Init(const std::string &profileDataPath = "") { base_time_.gpu_start_time = GetCUPTITimeStamp(); base_time_.host_start_time = GetHostTimeStamp(); + base_time_.host_start_monotonic_raw_time = GetHostMonoTimeStamp(); profile_data_path_ = profileDataPath; MS_LOG(INFO) << "GPU start time(ns):" << base_time_.gpu_start_time @@ -477,7 +502,7 @@ void GPUProfiler::SaveProfileData() { dataSaver.SetStepTraceOpName(step_trace_op_name); dataSaver.ParseOpInfo(op_info_map_); dataSaver.ParseEvent(events_); - dataSaver.WriteFile(profile_data_path_); + dataSaver.WriteFile(profile_data_path_, base_time_); SaveExtraProfileData(); } } diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h index 76ebae26eb..3b693e00b7 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.h @@ -106,6 +106,7 @@ struct OpInfo { struct BaseTime { // nanosecond uint64_t host_start_time = 0l; + uint64_t host_start_monotonic_raw_time = 0l; uint64_t gpu_start_time = 0l; }; diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index 3057e8bcca..c6815a343e 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "runtime/device/cpu/cpu_kernel_runtime.h" +#include #include #include #include @@ -29,6 +30,7 @@ #include "backend/session/anf_runtime_algorithm.h" #include "backend/session/session_basic.h" #include "frontend/operator/ops.h" +#include "profiler/device/cpu/cpu_profiling.h" #include "utils/shape_utils.h" #include "utils/profile.h" #include "utils/trace_base.h" @@ -376,6 +378,8 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink static_cast(mem_manager_.get())->IncreaseAddressRefCount(kernel_graph); auto kernels = kernel_graph->execution_order(); + auto profiler_inst = profiler::cpu::CPUProfiler::GetInstance(); + MS_EXCEPTION_IF_NULL(profiler_inst); for (const auto &kernel : kernels) { #ifdef ENABLE_PROFILE double start_time = GetTime(); @@ -406,11 +410,18 @@ bool CPUKernelRuntime::Run(session::KernelGraph *kernel_graph, bool is_task_sink AddRuntimeAddress(device_address, &kernel_workspaces); } bool ret = true; + if (profiler_inst->GetEnableFlag()) { + uint32_t pid = getpid(); + profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), pid); + } try { ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0); } catch (std::exception &e) { MS_LOG(EXCEPTION) << e.what() << "\nTrace:" << trace::DumpSourceLines(kernel); } + if (profiler_inst->GetEnableFlag()) { + profiler_inst->OpDataProducerEnd(); + } if (!ret) { MS_LOG(EXCEPTION) << "Launch kernel failed. Trace:" << trace::DumpSourceLines(kernel); } diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h index d1581f5e50..2998841162 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.h @@ -26,6 +26,7 @@ #include "backend/session/session_basic.h" #include "backend/session/anf_runtime_algorithm.h" #include "utils/any.h" +#include "profiler/device/cpu/cpu_profiling.h" namespace mindspore { namespace device { namespace cpu { diff --git a/mindspore/profiler/parser/integrator.py b/mindspore/profiler/parser/integrator.py index 92767d2a6b..732d4c3479 100644 --- a/mindspore/profiler/parser/integrator.py +++ b/mindspore/profiler/parser/integrator.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2021 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -666,7 +666,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator): ) file_path = validate_and_normalize_path(file_path) if not os.path.exists(file_path): - logger.error("Failed to find parsed timeline file.") + logger.error(f"Failed to find parsed timeline file {file_path}.") raise ProfilerFileNotFoundException('parsed timeline file') return file_path @@ -712,10 +712,33 @@ class GpuTimelineGenerator(BaseTimelineGenerator): timeline_list = self._load_op_data(op_file_path) + \ self._load_activity_data(activity_file_path, activity_args_file_path) + cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id) + cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data() + if cpu_timeline_list: + self._clock_synchronize_to_gpu(cpu_timeline_list) + timeline_list.extend(cpu_timeline_list) timeline_list.sort(key=lambda x: float(x[2])) return timeline_list + def _clock_synchronize_to_gpu(self, timeline_list): + """Synchronize the timestamp from device to host.""" + start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt") + + try: + with open(start_time_file_path) as f: + lines = f.readlines() + host_monotonic_start_time = int(lines[0].strip().split(':')[-1]) + gpu_start_time = int(lines[1].strip().split(':')[-1]) + except (IOError, OSError) as err: + logger.error(f'Error occurred when read {start_time_file_path}: {err}') + raise ProfilerIOException + + time_diff = gpu_start_time - host_monotonic_start_time + start_time = 2 + for idx, time_item in enumerate(timeline_list): + timeline_list[idx][start_time] = int(time_item[start_time]) + time_diff + def _load_op_data(self, op_file_path): """Load operator data from file""" op_timeline_list = [] @@ -847,7 +870,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): timeline_dict['pid'] = op_meta.pid self._timeline_meta.append(timeline_dict) - def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter): + def init_timeline(self, all_reduce_info, framework_info, aicpu_info, min_cycle_counter, source_path): """ Init timeline metadata, adding all collected info. @@ -862,6 +885,12 @@ class AscendTimelineGenerator(BaseTimelineGenerator): logger.info('Initiating timeline...') timeline_list = self._load_timeline_data() + cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id) + cpu_timeline_list = cpu_timeline_generator.get_timeline_data() + if cpu_timeline_list: + self._clock_synchronize_to_host(timeline_list, source_path) + timeline_list.extend(cpu_timeline_list) + timeline_list.sort(key=lambda x: float(x[2])) self._timeline_summary['op_exe_times'] = len(timeline_list) # Add AllReduce info to timeline temp list and sort by start time. @@ -897,3 +926,72 @@ class AscendTimelineGenerator(BaseTimelineGenerator): # Update timeline summary info self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys()) + + def _clock_synchronize_to_host(self, timeline_list, source_path): + """Synchronize the timestamp from device to host.""" + host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}") + dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}") + + try: + with open(host_start_file_path) as f: + lines = f.readlines() + host_monotonic = int(lines[2].strip().split(':')[1]) + except (IOError, OSError) as err: + logger.error('Error occurred when read host_start.log: %s', err) + raise ProfilerIOException + try: + with open(dev_start_file_path) as f: + lines = f.readlines() + dev_cntvct = int(lines[2].strip().split(':')[1]) + except (IOError, OSError) as err: + logger.error('Error occurred when read dev_start.log: %s', err) + raise ProfilerIOException + + factor_ns_to_ms = 1e6 + factor_ms_to_ten_ns = 1e5 + factor_ten_ns_to_ns = 10 + start_time = 2 + for idx, time_item in enumerate(timeline_list): + cycle_counter = int(float(time_item[start_time]) * factor_ms_to_ten_ns) + host_monotonic_time = host_monotonic + (cycle_counter - dev_cntvct) * factor_ten_ns_to_ns + timeline_list[idx][start_time] = host_monotonic_time / factor_ns_to_ms + +class CpuTimelineGenerator(GpuTimelineGenerator): + """Generate gpu Timeline data from file.""" + _output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt" + + def _get_and_validate_path(self, file_name): + """Generate op or activity file path from file name, and validate this path.""" + file_path = os.path.join( + self._profiling_dir, + file_name.format(self._device_id) + ) + file_path = validate_and_normalize_path(file_path) + + return file_path + + def load_cpu_op_data(self): + """Load cpu operator data from file""" + op_file_path = self._get_and_validate_path( + self._output_op_execute_time_file_path) + timeline_list = [] + if not os.path.exists(op_file_path): + logger.info("No cpu operator info.") + return timeline_list + timeline_list = self._load_op_data(op_file_path) + + return timeline_list + + def get_timeline_data(self): + """Get timeline data from file.""" + timeline_list = self.load_cpu_op_data() + factor_ns_to_ms = 1e6 + factor_us_to_ms = 1e3 + start_time = 2 + duration = 3 + for idx, time_item in enumerate(timeline_list): + time_item[start_time] = float(time_item[start_time]) / factor_ns_to_ms + time_item[duration] = float(time_item[duration]) / factor_us_to_ms + timeline_list[idx] = time_item + + return timeline_list diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py index 8f65874ea9..4f61de205a 100644 --- a/mindspore/profiler/profiling.py +++ b/mindspore/profiler/profiling.py @@ -96,6 +96,11 @@ class Profiler: os.environ['PROFILING_MODE'] = 'true' os.environ['MINDDATA_PROFILING_DIR'] = self._output_path + if self._device_target: + from mindspore._c_expression import CPUProfiler + self._cpu_profiler = CPUProfiler.get_instance() + self._cpu_profiler.init(self._output_path) + self._cpu_profiler.step_profiling_enable(True) if self._device_target and self._device_target == "GPU": from mindspore._c_expression import GPUProfiler self._gpu_profiler = GPUProfiler.get_instance() @@ -172,6 +177,7 @@ class Profiler: >>> model.train() >>> profiler.analyse() """ + self._cpu_profiler.stop() if self._device_target and self._device_target == "GPU": self._gpu_analyse() @@ -244,7 +250,7 @@ class Profiler: # analyse timeline info try: - self._analyse_timeline(aicpu_data_parser, optime_parser) + self._analyse_timeline(aicpu_data_parser, optime_parser, source_path) except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err: logger.warning('Fail to write timeline data: %s', err) @@ -336,7 +342,7 @@ class Profiler: return point_info - def _analyse_timeline(self, aicpu_parser, optime_parser): + def _analyse_timeline(self, aicpu_parser, optime_parser, source_path): """ Analyse and parse timeline info. @@ -368,7 +374,8 @@ class Profiler: # Add info into timeline, such as AI CPU, AllReduce, framework info. aicpu_info = aicpu_parser.query_aicpu_data() min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter) - timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, min_cycle_counter) + timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info, + min_cycle_counter, source_path) size_limit = 20 * 1024 * 1024 # 20MB timeline_analyser.write_timeline(size_limit) timeline_analyser.write_timeline_summary()