!14268 dataset: add stop_dataset_profiler option

From: @ms_yan
Reviewed-by: @heleiwang,@liucunwei
Signed-off-by: @liucunwei
pull/14268/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c7b6b1d24e

@ -50,6 +50,8 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
.def("set_auto_worker_config", &ConfigManager::set_auto_worker_config_)
.def("set_callback_timeout", &ConfigManager::set_callback_timeout)
.def("set_monitor_sampling_interval", &ConfigManager::set_monitor_sampling_interval)
.def("stop_dataset_profiler", &ConfigManager::stop_dataset_profiler)
.def("get_profiler_file_status", &ConfigManager::get_profiler_file_status)
.def("set_num_parallel_workers", &ConfigManager::set_num_parallel_workers)
.def("set_op_connector_size", &ConfigManager::set_op_connector_size)
.def("set_rows_per_buffer", &ConfigManager::set_rows_per_buffer)

@ -39,6 +39,8 @@ ConfigManager::ConfigManager()
seed_(kCfgDefaultSeed),
numa_enable_(false),
monitor_sampling_interval_(kCfgMonitorSamplingInterval),
stop_profiler_(false),
file_ready_(true),
callback_timout_(kCfgCallbackTimeout),
cache_host_(kCfgDefaultCacheHost),
cache_port_(kCfgDefaultCachePort),
@ -139,6 +141,10 @@ void ConfigManager::set_seed(uint32_t seed) { seed_ = seed; }
void ConfigManager::set_monitor_sampling_interval(uint32_t interval) { monitor_sampling_interval_ = interval; }
void ConfigManager::stop_dataset_profiler(bool stop_profiler) { stop_profiler_ = stop_profiler; }
void ConfigManager::set_profiler_file_status(bool file_ready) { file_ready_ = file_ready; }
void ConfigManager::set_callback_timeout(uint32_t timeout) { callback_timout_ = timeout; }
void ConfigManager::set_cache_host(std::string cache_host) { cache_host_ = std::move(cache_host); }

@ -16,6 +16,7 @@
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONFIG_MANAGER_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONFIG_MANAGER_H_
#include <atomic>
#include <ostream>
#include <sstream>
#include <string>
@ -178,6 +179,22 @@ class ConfigManager {
// @return The interval of monitor sampling
int32_t monitor_sampling_interval() const { return monitor_sampling_interval_; }
// setter function
// @param stop_profiler - The setting to apply to the config
void stop_dataset_profiler(bool stop_profiler);
// getter function
// @return The status of stop profiler
bool stop_profiler_status() const { return stop_profiler_; }
// setter function
// @param file_ready - The setting to apply to the config
void set_profiler_file_status(bool file_ready);
// getter function
// @return The status of profiler file, whether generated
bool get_profiler_file_status() const { return file_ready_; }
// setter function
// @param auto_num_workers - whether assign threads to each op automatically
void set_auto_num_workers(bool auto_num_workers) { auto_num_workers_ = auto_num_workers; }
@ -223,6 +240,8 @@ class ConfigManager {
int32_t rank_id_;
uint32_t seed_;
uint32_t monitor_sampling_interval_;
std::atomic_bool stop_profiler_;
std::atomic_bool file_ready_;
uint32_t callback_timout_;
std::string cache_host_;
int32_t cache_port_;

@ -113,7 +113,7 @@ Status DeviceCpu::ParseRunningProcess(const std::string &str) {
Status DeviceCpu::Collect(ExecutionTree *tree) {
std::ifstream file("/proc/stat");
if (!file.is_open()) {
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
MS_LOG(INFO) << "Open CPU file failed when collect CPU information";
return Status::OK();
}
bool first_line = true;
@ -214,7 +214,7 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
std::ifstream file(stat_path);
if (!file.is_open()) {
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
MS_LOG(INFO) << "Open CPU file failed when collect CPU information";
return Status::OK();
}
std::string str;
@ -236,7 +236,7 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
Status OperatorCpu::GetTotalCpuTime(uint64_t *total_stat) {
std::ifstream file("/proc/stat");
if (!file.is_open()) {
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
MS_LOG(INFO) << "Open CPU file failed when collect CPU information";
return Status::OK();
}
std::string str;
@ -443,7 +443,7 @@ Status ProcessCpu::ParseCpuInfo() {
std::ifstream file(stat_path);
if (!file.is_open()) {
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
MS_LOG(INFO) << "Open CPU file failed when collect CPU information";
continue;
}
std::string str;
@ -479,7 +479,7 @@ Status ProcessCpu::ParseCpuInfo() {
Status ProcessCpu::GetTotalCpuTime(uint64_t *total_stat) {
std::ifstream file("/proc/stat");
if (!file.is_open()) {
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
MS_LOG(INFO) << "Open CPU file failed when collect CPU information";
return Status::OK();
}
std::string str;

@ -29,11 +29,13 @@ Monitor::Monitor(ExecutionTree *tree) : tree_(tree) {
Status Monitor::operator()() {
// Register this thread with TaskManager to receive proper interrupt signal.
TaskManager::FindMe()->Post();
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
cfg->set_profiler_file_status(false);
// Keep sampling if
// 1) Monitor Task is not interrupted by TaskManager AND
// 2) Iterator has not received EOF
while (!this_thread::is_interrupted() && !(tree_->isFinished())) {
while (!this_thread::is_interrupted() && !(tree_->isFinished()) && !(cfg->stop_profiler_status())) {
if (tree_->IsEpochEnd()) {
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
tree_->SetExecuting();
@ -48,6 +50,8 @@ Status Monitor::operator()() {
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->Analyze());
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->ChangeFileMode());
cfg->set_profiler_file_status(true);
return Status::OK();
}

@ -18,8 +18,10 @@ configuration parameters, and read a configuration file.
"""
import os
import random
import time
import numpy
import mindspore._c_dataengine as cde
from mindspore import log as logger
__all__ = ['set_seed', 'get_seed', 'set_prefetch_size', 'get_prefetch_size', 'set_num_parallel_workers',
'get_num_parallel_workers', 'set_monitor_sampling_interval', 'get_monitor_sampling_interval', 'load',
@ -357,3 +359,17 @@ def load(file):
>>> ds.config.load(config_file)
"""
_config.load(file)
def _stop_dataset_profiler():
"""
Mainly for stop dataset profiler.
Returns:
bool, whether the profiler file has generated.
"""
while not _config.get_profiler_file_status():
_config.stop_dataset_profiler(True)
logger.warning("Profiling: waiting for dataset part profiling stop.")
time.sleep(1)

@ -23,6 +23,7 @@ from enum import Enum
from mindspore import log as logger, context
from mindspore.communication.management import GlobalComm, release, get_rank
import mindspore._c_expression as c_expression
from mindspore.dataset.core.config import _stop_dataset_profiler
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
ProfilerIOException, ProfilerException, ProfilerRawFileException
from mindspore.profiler.common.util import get_file_names, fwrite_format
@ -189,6 +190,7 @@ class Profiler:
Collect and analyse performance data, called after training or during training. The example shows above.
"""
self._cpu_profiler.stop()
_stop_dataset_profiler()
if self._device_target and self._device_target == "GPU":
self._gpu_analyse()

Loading…
Cancel
Save