You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mindspore/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc

180 lines
6.3 KiB

/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/perf/profiling.h"
#include <sys/stat.h>
#include <sys/time.h>
#include <cstdlib>
#include <fstream>
#include "utils/ms_utils.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/engine/perf/monitor.h"
#include "minddata/dataset/engine/perf/device_queue_tracing.h"
#include "minddata/dataset/engine/perf/connector_size.h"
#include "minddata/dataset/engine/perf/connector_throughput.h"
#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
#include "minddata/dataset/util/log_adapter.h"
namespace mindspore {
namespace dataset {
// Constructor
ProfilingManager::ProfilingManager(ExecutionTree *tree) : tree_(tree), enabled_(true) {
perf_monitor_ = std::make_unique<Monitor>(tree_);
}
bool ProfilingManager::IsProfilingEnable() const { return common::GetEnv("PROFILING_MODE") == "true" && enabled_; }
Status ProfilingManager::Initialize() {
// Register nodes based on config
std::string dir = common::GetEnv("MINDDATA_PROFILING_DIR");
if (dir.empty()) {
RETURN_STATUS_UNEXPECTED("Profiling dir is not set.");
}
char real_path[PATH_MAX] = {0};
if (dir.size() >= PATH_MAX) {
RETURN_STATUS_UNEXPECTED("Profiling dir is invalid.");
}
#if defined(_WIN32) || defined(_WIN64)
if (_fullpath(real_path, common::SafeCStr(dir), PATH_MAX) == nullptr) {
RETURN_STATUS_UNEXPECTED("Profiling dir is invalid.");
}
#else
if (realpath(common::SafeCStr(dir), real_path) == nullptr) {
RETURN_STATUS_UNEXPECTED("Profiling dir is invalid.");
}
#endif
dir_path_ = real_path;
// If DEVICE_ID is not set, default value is 0
device_id_ = common::GetEnv("DEVICE_ID");
if (device_id_.empty()) {
device_id_ = "0";
}
// Register all profiling node.
// device_queue node is used for graph mode
std::shared_ptr<Tracing> device_queue_tracing = std::make_shared<DeviceQueueTracing>();
RETURN_IF_NOT_OK(RegisterTracingNode(device_queue_tracing));
// dataset_iterator node is used for graph mode
std::shared_ptr<Tracing> dataset_iterator_tracing = std::make_shared<DatasetIteratorTracing>();
RETURN_IF_NOT_OK(RegisterTracingNode(dataset_iterator_tracing));
std::shared_ptr<Sampling> connector_size_sampling = std::make_shared<ConnectorSize>(tree_);
RETURN_IF_NOT_OK(RegisterSamplingNode(connector_size_sampling));
std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_);
RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling));
return Status::OK();
}
// Launch monitoring thread.
Status ProfilingManager::LaunchMonitor() {
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Monitor Thread launched", std::ref(*perf_monitor_)));
return Status::OK();
}
// Profiling node registration
Status ProfilingManager::RegisterTracingNode(std::shared_ptr<Tracing> node) {
// Check if node with the same name has already been registered.
auto exist = tracing_nodes_.find(node->Name());
if (exist != tracing_nodes_.end()) {
return Status(StatusCode::kProfilingError, "Profiling node already exist: " + node->Name());
}
// Register the node with its name as key.
RETURN_IF_NOT_OK(node->Init(dir_path_, device_id_));
tracing_nodes_[node->Name()] = node;
return Status::OK();
}
// Profiling node getter
Status ProfilingManager::GetTracingNode(const std::string &name, std::shared_ptr<Tracing> *node) {
// Check if node with the same name has already been registered.
auto exist = tracing_nodes_.find(name);
if (exist == tracing_nodes_.end()) {
return Status(StatusCode::kProfilingError, "Profiling node does not exist: " + name);
}
// Fetch node.
*node = tracing_nodes_[name];
return Status::OK();
}
// Profiling node registration
Status ProfilingManager::RegisterSamplingNode(std::shared_ptr<Sampling> node) {
// Check if node with the same name has already been registered.
auto exist = sampling_nodes_.find(node->Name());
if (exist != sampling_nodes_.end()) {
return Status(StatusCode::kProfilingError, "Profiling node already exist: " + node->Name());
}
// Register the node with its name as key.
RETURN_IF_NOT_OK(node->Init(dir_path_, device_id_));
sampling_nodes_[node->Name()] = node;
return Status::OK();
}
// Profiling node getter
Status ProfilingManager::GetSamplingNode(const std::string &name, std::shared_ptr<Sampling> *node) {
// Check if node with the same name has already been registered.
auto exist = sampling_nodes_.find(name);
if (exist == sampling_nodes_.end()) {
return Status(StatusCode::kProfilingError, "Profiling node does not exist: " + name);
}
// Fetch node.
*node = sampling_nodes_[name];
return Status::OK();
}
Status ProfilingManager::SaveProfilingData() {
if (!IsProfilingEnable()) {
return Status::OK();
}
MS_LOG(INFO) << "Start to save profiling data.";
for (auto node : tracing_nodes_) {
RETURN_IF_NOT_OK(node.second->SaveToFile());
}
for (auto node : sampling_nodes_) {
RETURN_IF_NOT_OK(node.second->SaveToFile());
}
MS_LOG(INFO) << "Save profiling data end.";
return Status::OK();
}
Status ProfilingManager::ChangeFileMode() {
if (!IsProfilingEnable()) {
return Status::OK();
}
MS_LOG(INFO) << "Start to change file mode.";
for (auto node : tracing_nodes_) {
RETURN_IF_NOT_OK(node.second->ChangeFileMode());
}
for (auto node : sampling_nodes_) {
RETURN_IF_NOT_OK(node.second->ChangeFileMode());
}
MS_LOG(INFO) << "Change file mode end.";
return Status::OK();
}
int64_t ProfilingTime::GetCurMilliSecond() {
// because cpplint does not allow using namespace
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::chrono::steady_clock;
return duration_cast<milliseconds>(steady_clock::now().time_since_epoch()).count();
}
} // namespace dataset
} // namespace mindspore