!10063 Modify profiler directory structure to fit the new Run Package

From: @gzhcv
Reviewed-by: 
Signed-off-by:
pull/10063/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 261bb736de

@ -214,11 +214,8 @@ class _Context:
self.set_param(ms_ctx_param.max_call_depth, max_call_depth)
def set_profiling_options(self, option):
options = ["training_trace", "task_trace",
"task_trace:training_trace", "training_trace:task_trace", "op_trace"]
if option not in options:
raise ValueError("Profiling options must be in 'training_trace' 'task_trace' "
"'task_trace:training_trace' 'training_trace:task_trace' or 'op_trace'.")
if not isinstance(option, str):
raise TypeError("The parameter option must be str.")
self.set_param(ms_ctx_param.profiling_options, option)
def set_variable_memory_max_size(self, variable_memory_max_size):

@ -174,7 +174,6 @@ class FrameworkParser:
device_id (str): The device ID.
output_path (str): The directory of the parsed file. Default: `./`.
"""
_raw_data_dir = '/var/log/npu/profiling'
_regex_framework = r'Framework\.(?P<data_type>.+)\.(?P<device_id>\d).+'
_regex_framework_in_data = r'Framework\.(?P<data_type>.+)\.' \
r'(?P<device_id>\d)\.(?P<profiling_id>[a-zA-Z0-9]+).+'
@ -193,6 +192,7 @@ class FrameworkParser:
_task_id_threshold = 25000
def __init__(self, profiling_id, device_id, output_path='./'):
self._raw_data_dir = output_path
self._profiling_path = self._get_raw_profiling_path(profiling_id)
self._backend_type = None
self._framework_path = {'graph': [], 'task': [], 'point': []}

@ -16,6 +16,7 @@
import os
import stat
import time
import json
from enum import Enum
from mindspore import log as logger, context
@ -37,7 +38,6 @@ from mindspore.profiler.parser.optime_parser import OPComputeTimeParser
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
from mindspore.nn.cell import Cell
PROFILING_LOG_BASE_PATH = "/var/log/npu/profiling"
INIT_OP_NAME = 'Default/InitDataSetQueue'
class ProfileOption(Enum):
@ -72,7 +72,6 @@ class Profiler:
>>> profiler.analyse()
"""
_base_profiling_container_path = "/var/log/npu/profiling/container"
_hwts_output_filename_target = "output_format_data_hwts_"
_opcompute_output_filename_target = "output_op_compute_time_"
_aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
@ -80,9 +79,11 @@ class Profiler:
def __init__(self, **kwargs):
# get device_id and device_target
self._get_devid_and_devtarget()
output_path = kwargs.pop("output_path", "./data")
format_time = int(time.time())
output_path = kwargs.pop("output_path", f"data-{format_time}")
self._output_path = validate_and_normalize_path(output_path)
self._output_path = os.path.join(self._output_path, "profiler")
self._output_path = os.path.join(self._output_path, f"profiler-{format_time}")
self._base_profiling_container_path = os.path.join(self._output_path, "container")
if not os.path.exists(self._output_path):
os.makedirs(self._output_path, exist_ok=True)
os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
@ -113,10 +114,25 @@ class Profiler:
logger.warning("There are invalid params which don't work.")
os.environ['DEVICE_ID'] = self._dev_id
os.environ['AICPU_PROFILING_MODE'] = 'true'
fp_point = os.environ.get("PROFILING_FP_START", "")
bp_point = os.environ.get("PROFILING_BP_END", "")
profiling_options = {
"result_path": self._output_path,
"fp_point": fp_point,
"bp_point": bp_point,
"training_trace": "on",
"task_trace": "on",
"ai_core_metrics": "PipeUtilization",
"aicpu_trace": "on"
}
profiling_options = json.dumps(profiling_options)
# Characters longer than 2048 are ignored, resulting in profiling option resolution errors
if len(profiling_options) > 2048:
raise ValueError("The parameter length exceeds the limit (2048)")
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context.set_context(enable_profiling=True, profiling_options="training_trace:task_trace")
context.set_context(enable_profiling=True, profiling_options=profiling_options)
self._container_path = os.path.join(self._base_profiling_container_path, self._dev_id)
data_path = os.path.join(self._container_path, "data")
@ -174,7 +190,7 @@ class Profiler:
job_id = self._get_profiling_job_id()
logger.info("Profiling: job id is %s ", job_id)
source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
source_path = os.path.join(self._output_path, job_id)
# parse hwts.log.data.45.dev file, and get task profiling data
hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
@ -353,12 +369,12 @@ class Profiler:
return self._profiling_job_id
job_id = ""
cmd = "ls -t " + PROFILING_LOG_BASE_PATH + "|grep JOB|awk '{print $1}'"
cmd = "ls -t " + self._output_path + "|grep JOB|awk '{print $1}'"
r = os.popen(cmd)
profiling_job_dirs = r.readlines()
r.close()
for item in profiling_job_dirs:
path = os.path.join(PROFILING_LOG_BASE_PATH, item.strip())
path = os.path.join(self._output_path, item.strip())
log_file = get_file_names(path, "host_start.log")
if not log_file:
logger.error("Profiling: job path %s, host_start.log not exist.", path)

@ -128,7 +128,6 @@ def cleanup():
class TestProfiler:
device_id = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
mnist_path = '/home/workspace/mindspore_dataset/mnist'
profiler_path = os.path.join(os.getcwd(), 'data/profiler/')
@classmethod
def teardown_class(cls):
@ -140,7 +139,9 @@ class TestProfiler:
@pytest.mark.env_onecard
def test_gpu_profiler(self):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
profiler = Profiler()
profiler = Profiler(output_path='data')
profiler_name = os.listdir(os.path.join(os.getcwd(), 'data'))[0]
self.profiler_path = os.path.join(os.getcwd(), f'data/{profiler_name}/')
ds_train = create_dataset(os.path.join(self.mnist_path, "train"))
if ds_train.get_dataset_size() == 0:
raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")

@ -49,13 +49,15 @@ class TestFrameworkParser:
"""Test the class of `FrameworkParser`."""
def setup_method(self):
"""Initialization before test case execution."""
with mock.patch.object(FrameworkParser, '_raw_data_dir', RAW_DATA_BASE):
self._output_path_1 = tempfile.mkdtemp(prefix='test_framework_parser_')
self._parser_1 = FrameworkParser('JOB1', '0', self._output_path_1)
self._output_path_2 = tempfile.mkdtemp(prefix='test_framework_parser_')
self._parser_2 = FrameworkParser('JOB2', '0', self._output_path_2)
self._output_path_4 = tempfile.mkdtemp(prefix='test_framework_parser_')
self._parser_4 = FrameworkParser('JOB4', '0', self._output_path_4)
self._output_path_1 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
shutil.copytree(RAW_DATA_BASE, self._output_path_1)
self._parser_1 = FrameworkParser('JOB1', '0', self._output_path_1)
self._output_path_2 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
shutil.copytree(RAW_DATA_BASE, self._output_path_2)
self._parser_2 = FrameworkParser('JOB2', '0', self._output_path_2)
self._output_path_4 = tempfile.NamedTemporaryFile(prefix='test_framework_parser_').name
shutil.copytree(RAW_DATA_BASE, self._output_path_4)
self._parser_4 = FrameworkParser('JOB4', '0', self._output_path_4)
def teardown_method(self) -> None:
"""Clear up after test case execution."""

@ -15,6 +15,7 @@
""" test_context """
import os
import shutil
import json
import pytest
from mindspore import context
@ -94,14 +95,18 @@ def test_profiling_options():
context.set_context(profiling_options=True)
with pytest.raises(TypeError):
context.set_context(profiling_options=1)
with pytest.raises(ValueError):
context.set_context(profiling_options="training_")
with pytest.raises(ValueError):
context.set_context(profiling_options="training_trace:op_trace")
context.set_context(profiling_options="training_trace")
assert context.get_context("profiling_options") == "training_trace"
context.set_context(profiling_options="training_trace:task_trace")
assert context.get_context("profiling_options") == "training_trace:task_trace"
profiling_options = {
"result_path": "",
"fp_point": "",
"bp_point": "",
"training_trace": "on",
"task_trace": "on",
"ai_core_metrics": "PipeUtilization",
"aicpu_trace": "on"
}
profiling_options = json.dumps(profiling_options)
context.set_context(profiling_options=profiling_options)
assert context.get_context("profiling_options") == profiling_options
def test_variable_memory_max_size():

Loading…
Cancel
Save