Add profiler module

pull/3075/head
yuximiao 5 years ago
parent 180b3029e5
commit ed9cc50551

@ -213,6 +213,7 @@ install(
${CMAKE_SOURCE_DIR}/mindspore/common
${CMAKE_SOURCE_DIR}/mindspore/ops
${CMAKE_SOURCE_DIR}/mindspore/communication
${CMAKE_SOURCE_DIR}/mindspore/profiler
DESTINATION ${INSTALL_PY_DIR}
COMPONENT mindspore
)

@ -0,0 +1,27 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Profiler Module Introduction.
This module provides Python APIs to enable the profiling of MindSpore neural networks.
Users can import the mindspore.profiler.Profiler, initialize the Profiler object to start profiling,
and use Profiler.analyse() to stop profiling and analyse the results.
To visualize the profiling results, users can open mindspore Web, find the corresponding run
and click the profile link.
Now, Profiler supports the AICore operator analysis.
"""
from mindspore.profiler.profiling import Profiler
__all__ = ["Profiler"]

@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

@ -0,0 +1,85 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Profiler error code and messages."""
from enum import unique, Enum
_GENERAL_MASK = 0b00001 << 7
_PARSER_MASK = 0b00010 << 7
_ANALYSER_MASK = 0b00011 << 7
class ProfilerMgrErrors(Enum):
"""Enum definition for profiler errors"""
@unique
class ProfilerErrors(ProfilerMgrErrors):
"""Profiler error codes."""
# general error code
PARAM_VALUE_ERROR = 0 | _GENERAL_MASK
PATH_ERROR = 1 | _GENERAL_MASK
PARAM_TYPE_ERROR = 2 | _GENERAL_MASK
DIR_NOT_FOUND_ERROR = 3 | _GENERAL_MASK
FILE_NOT_FOUND_ERROR = 4 | _GENERAL_MASK
IO_ERROR = 5 | _GENERAL_MASK
# parser error code
DEVICE_ID_MISMATCH_ERROR = 0 | _PARSER_MASK
RAW_FILE_ERROR = 1 | _PARSER_MASK
STEP_NUM_NOT_SUPPORTED_ERROR = 2 | _PARSER_MASK
JOB_ID_MISMATCH_ERROR = 3 | _PARSER_MASK
# analyser error code
COLUMN_NOT_EXIST_ERROR = 0 | _ANALYSER_MASK
ANALYSER_NOT_EXIST_ERROR = 1 | _ANALYSER_MASK
DEVICE_ID_ERROR = 2 | _ANALYSER_MASK
OP_TYPE_ERROR = 3 | _ANALYSER_MASK
GROUP_CONDITION_ERROR = 4 | _ANALYSER_MASK
SORT_CONDITION_ERROR = 5 | _ANALYSER_MASK
FILTER_CONDITION_ERROR = 6 | _ANALYSER_MASK
COLUMN_NOT_SUPPORT_SORT_ERROR = 7 | _ANALYSER_MASK
PIPELINE_OP_NOT_EXIST_ERROR = 8 | _ANALYSER_MASK
@unique
class ProfilerErrorMsg(Enum):
"""Profiler error messages."""
# general error msg
PARAM_VALUE_ERROR = 'Param value error. {}'
PATH_ERROR = 'Path error. {}'
PARAM_TYPE_ERROR = 'Param type error. {}'
DIR_NOT_FOUND_ERROR = 'The dir <{}> not found.'
FILE_NOT_FOUND_ERROR = 'The file <{}> not found.'
IO_ERROR = 'Read or write file fail.'
# parser error msg
DEVICE_ID_MISMATCH_ERROR = 'The device ID mismatch.'
RAW_FILE_ERROR = 'Raw file error. {}'
STEP_NUM_NOT_SUPPORTED_ERROR = 'The step num must be in {}'
JOB_ID_MISMATCH_ERROR = 'The job id in the parameter is not the same as ' \
'in the training trace file. '
# analyser error msg
COLUMN_NOT_EXIST_ERROR = 'The column {} does not exist.'
ANALYSER_NOT_EXIST_ERROR = 'The analyser {} does not exist.'
DEIVICE_ID_ERROR = 'The device_id in search_condition error, {}'
FILTER_CONDITION_ERROR = 'The filter_condition in search_condition error, {}'
OP_TYPE_ERROR = 'The op_type in search_condition error, {}'
GROUP_CONDITION_ERROR = 'The group_condition in search_condition error, {}'
SORT_CONDITION_ERROR = 'The sort_condition in search_condition error, {}'
COLUMN_NOT_SUPPORT_SORT_ERROR = 'The column {} does not support to sort.'
PIPELINE_OP_NOT_EXIST_ERROR = 'The minddata pipeline operator {} does not exist.'

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

@ -0,0 +1,26 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Profiler check parameters."""
def check_bool(input_param, param_name):
"""Bool type judgment."""
if isinstance(input_param, bool):
return input_param
raise TypeError("Parameter {}: input type must be bool!".format(param_name))
def check_subgraph(subgraph):
"""Check subgraph."""
if subgraph in ("all", "Default", "Gradients"):
return subgraph
raise ValueError("subgraph must be all or Default or Gradients, but got {}.".format(subgraph))

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Validate the input path."""
import os
def validate_and_normalize_path(
path,
check_absolute_path=False,
allow_parent_dir=False,
):
"""
Validates path and returns its normalized form.
If path has a valid scheme, treat path as url, otherwise consider path a
unix local path.
Note:
File scheme (rfc8089) is currently not supported.
Args:
path (str): Path to be normalized.
check_absolute_path (bool): Whether check path scheme is supported.
allow_parent_dir (bool): Whether allow parent dir in path.
Returns:
str, normalized path.
"""
if not path:
raise RuntimeError("The path is invalid!")
path_str = str(path)
if not allow_parent_dir:
path_components = path_str.split("/")
if ".." in path_components:
raise RuntimeError("The path is invalid!")
# path does not have valid schema, treat it as unix local path.
if check_absolute_path:
if not path_str.startswith("/"):
raise RuntimeError("The path is invalid!")
try:
# most unix systems allow
normalized_path = os.path.realpath(path)
except ValueError:
raise RuntimeError("The path is invalid!")
return normalized_path

@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

@ -0,0 +1,175 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
The parser for AI CPU preprocess data.
"""
import os
from mindspore.profiler.common.util import fwrite_format, get_file_join_name
from mindspore import log as logger
class DataPreProcessParser:
"""
The Parser for AI CPU preprocess data.
Args:
input_path(str): The profiling job path.
output_filename(str): The output data path and name.
"""
_source_file_target = 'DATA_PREPROCESS.dev.AICPU.'
_dst_file_title = 'title:DATA_PREPROCESS AICPU'
_dst_file_column_title = ['serial_number', 'node_type_name', 'total_time(ms)',
'dispatch_time(ms)', 'run_start', 'run_end']
_ms_unit = 1000
def __init__(self, input_path, output_filename):
self._input_path = input_path
self._output_filename = output_filename
self._source_file_name = self._get_source_file()
self._ms_kernel_flag = 3
self._other_kernel_flag = 6
self._thread_flag = 7
self._ms_kernel_run_end_index = 2
self._other_kernel_run_end_index = 5
self._result_list = []
self._min_cycle_counter = float('inf')
def _get_source_file(self):
"""Get log file name, which was created by ada service."""
file_name = get_file_join_name(self._input_path, self._source_file_target)
if not file_name:
data_path = os.path.join(self._input_path, "data")
file_name = get_file_join_name(data_path, self._source_file_target)
return file_name
def _get_kernel_result(self, number, node_list, thread_list):
"""Get the profiling data form different aicpu kernel"""
try:
if len(node_list) == self._ms_kernel_flag and len(thread_list) == self._thread_flag:
node_type_name = node_list[0].split(':')[-1]
run_end_index = self._ms_kernel_run_end_index
elif len(node_list) == self._other_kernel_flag and len(thread_list) == self._thread_flag:
node_type_name = node_list[0].split(':')[-1].split('/')[-1].split('-')[0]
run_end_index = self._other_kernel_run_end_index
else:
logger.warning("the data format can't support 'node_list':%s", str(node_list))
return None
run_start = node_list[1].split(':')[-1].split(' ')[0]
run_end = node_list[run_end_index].split(':')[-1].split(' ')[0]
total_time = float(thread_list[-1].split('=')[-1].split()[0]) / self._ms_unit
dispatch_time = float(thread_list[-2].split('=')[-1].split()[0]) / self._ms_unit
return [number, node_type_name, total_time, dispatch_time,
run_start, run_end]
except IndexError as e:
logger.error(e)
return None
def execute(self):
"""Execute the parser, get result data, and write it to the output file."""
if not os.path.exists(self._source_file_name):
logger.info("Did not find the aicpu profiling source file")
return
with open(self._source_file_name, 'rb') as ai_cpu_data:
ai_cpu_str = str(ai_cpu_data.read().replace(b'\n\x00', b' ___ ')
.replace(b'\x00', b' ___ '))[2:-1]
ai_cpu_lines = ai_cpu_str.split(" ___ ")
result_list = list()
ai_cpu_total_time_summary = 0
# Node serial number.
serial_number = 1
for i in range(len(ai_cpu_lines) - 1):
node_line = ai_cpu_lines[i]
thread_line = ai_cpu_lines[i + 1]
if "Node" in node_line and "Thread" in thread_line:
# Get the node data from node_line
node_list = node_line.split(',')
thread_list = thread_line.split(',')
result = self._get_kernel_result(serial_number, node_list, thread_list)
if result is None:
continue
result_list.append(result)
# Calculate the total time.
total_time = result[2]
ai_cpu_total_time_summary += total_time
# Increase node serial number.
serial_number += 1
elif "Node" in node_line and "Thread" not in thread_line:
node_type_name = node_line.split(',')[0].split(':')[-1]
logger.warning("The node type:%s cannot find thread data", node_type_name)
if result_list:
ai_cpu_total_time = format(ai_cpu_total_time_summary, '.6f')
result_list.append(["AI CPU Total Time(ms):", ai_cpu_total_time])
fwrite_format(self._output_filename, " ".join(self._dst_file_column_title), is_start=True, is_print=True)
fwrite_format(self._output_filename, result_list, is_print=True)
# For timeline display.
self._result_list = result_list
def query_aicpu_data(self):
"""
Get execution time of AI CPU operator.
Returns:
a dict, the metadata of AI CPU operator execution time.
"""
stream_id = 0 # Default stream id for AI CPU.
pid = 9000 # Default pid for AI CPU.
factor = 1000 # Convert time unit from 1us to 1ms
total_time = 0
min_cycle_counter = float('inf')
aicpu_info = []
op_count_list = []
for aicpu_item in self._result_list:
if "AI CPU Total Time(ms):" in aicpu_item:
total_time = aicpu_item[-1]
continue
op_name = aicpu_item[1]
start_time = float(aicpu_item[4]) / factor
min_cycle_counter = min(min_cycle_counter, start_time)
end_time = float(aicpu_item[5]) / factor
duration = end_time - start_time
aicpu_info.append([op_name, stream_id, start_time, duration, pid])
# Record the number of operator types.
if op_name not in op_count_list:
op_count_list.append(op_name)
self._min_cycle_counter = min_cycle_counter
aicpu_dict = {
'info': aicpu_info,
'total_time': float(total_time),
'op_exe_times': len(aicpu_info),
'num_of_ops': len(op_count_list),
'num_of_streams': 1
}
return aicpu_dict
@property
def min_cycle_counter(self):
"""Get minimum cycle counter in AI CPU."""
return self._min_cycle_counter

@ -0,0 +1,113 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The container of metadata used in profiler parser."""
class HWTSContainer:
"""
HWTS output container.
Args:
split_list (list): The split list of metadata in HWTS output file.
"""
def __init__(self, split_list):
self._op_name = ''
self._duration = None
self._status = split_list[0]
self._task_id = split_list[6]
self._cycle_counter = float(split_list[7])
self._stream_id = split_list[8]
@property
def status(self):
"""Get the status of the operator, i.e. Start or End."""
return self._status
@property
def task_id(self):
"""Get the task id of the operator."""
return self._task_id
@property
def cycle_counter(self):
"""Get the cycle counter."""
return self._cycle_counter
@property
def stream_id(self):
"""Get the stream id of the operator."""
return self._stream_id
@property
def op_name(self):
"""Get the name of the operator."""
return self._op_name
@op_name.setter
def op_name(self, name):
"""Set the name of the operator."""
self._op_name = name
@property
def duration(self):
"""Get the duration of the operator execution."""
return self._duration
@duration.setter
def duration(self, value):
"""Set the duration of the operator execution."""
self._duration = value
class TimelineContainer:
"""
A container of operator computation metadata.
Args:
split_list (list): The split list of metadata in op_compute output file.
"""
def __init__(self, split_list):
self._op_name = split_list[0]
self._stream_id = int(split_list[1])
self._start_time = float(split_list[2])
self._duration = float(split_list[3])
self._pid = None
if len(split_list) == 5:
self._pid = int(split_list[4])
@property
def op_name(self):
"""Get the name of the operator."""
return self._op_name
@property
def stream_id(self):
"""Get the stream id of the operator."""
return self._stream_id
@property
def start_time(self):
"""Get the execution start time of the operator."""
return self._start_time
@property
def duration(self):
"""Get the duration of the operator execution."""
return self._duration
@property
def pid(self):
"""Get the pid of the operator execution."""
return self._pid

File diff suppressed because it is too large Load Diff

@ -0,0 +1,109 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The parser for hwts log file."""
import os
import struct
from mindspore.profiler.common.util import fwrite_format, get_file_join_name
from mindspore import log as logger
class HWTSLogParser:
"""
The Parser for hwts log files.
Args:
input_path (str): The profiling job path. Such as: '/var/log/npu/profiling/JOBAIFGJEJFEDCBAEADIFJAAAAAAAAAA".
output_filename (str): The output data path and name. Such as: './output_format_data_hwts_0.txt'.
"""
_source_file_target = 'hwts.log.data.45.dev.profiler_default_tag'
_dst_file_title = 'title:45 HWTS data'
_dst_file_column_title = 'Type cnt Core_ID Block_ID Task_ID Cycle_counter Stream_ID'
def __init__(self, input_path, output_filename):
self._input_path = input_path
self._output_filename = output_filename
self._source_flie_name = self._get_source_file()
def _get_source_file(self):
"""Get hwts log file name, which was created by ada service."""
file_name = get_file_join_name(self._input_path, self._source_file_target)
if not file_name:
data_path = os.path.join(self._input_path, "data")
file_name = get_file_join_name(data_path, self._source_file_target)
if not file_name:
msg = "Fail to find hwts log file, under profiling directory"
raise RuntimeError(msg)
return file_name
def execute(self):
"""
Execute the parser, get result data, and write it to the output file.
Returns:
bool, whether succeed to analyse hwts log.
"""
content_format = ['QIIIIIIIIIIII', 'QIIQIIIIIIII', 'IIIIQIIIIIIII']
log_type = ['Start of task', 'End of task', 'Start of block', 'End of block', 'Block PMU']
result_data = ""
with open(self._source_flie_name, 'rb') as hwts_data:
while True:
line = hwts_data.read(64)
if line:
if not line.strip():
continue
else:
break
byte_first_four = struct.unpack('BBHHH', line[0:8])
byte_first = bin(byte_first_four[0]).replace('0b', '').zfill(8)
ms_type = byte_first[-3:]
is_warn_res0_ov = byte_first[4]
cnt = int(byte_first[0:4], 2)
core_id = byte_first_four[1]
blk_id, task_id = byte_first_four[3], byte_first_four[4]
if ms_type in ['000', '001', '010']: # log type 0,1,2
result = struct.unpack(content_format[0], line[8:])
syscnt = result[0]
stream_id = result[1]
elif ms_type == '011': # log type 3
result = struct.unpack(content_format[1], line[8:])
syscnt = result[0]
stream_id = result[1]
elif ms_type == '100': # log type 4
result = struct.unpack(content_format[2], line[8:])
stream_id = result[2]
if is_warn_res0_ov == '0':
syscnt = result[4]
else:
syscnt = None
else:
logger.info("Profiling: invalid hwts log record type %s", ms_type)
continue
if int(task_id) < 25000:
task_id = str(stream_id) + "_" + str(task_id)
result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" %(log_type[int(ms_type, 2)], cnt, core_id,
blk_id, task_id, syscnt, stream_id))
fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
fwrite_format(self._output_filename, data_source=result_data)
return True

File diff suppressed because it is too large Load Diff

@ -0,0 +1,88 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Minddata aicpu parser."""
import os
from mindspore.profiler.common.util import get_file_join_name, fwrite_format
from mindspore import log as logger
class MinddataParser:
"""Minddata Aicpu Parser."""
@staticmethod
def parse_minddata_aicpu_data(minddata_aicpu_source_path):
"""
Parse minddata get_next info which contains queue size and execute time.
Args:
minddata_aicpu_source_path (str): the source file path.
Returns:
list[Union[str, float]], the converted data.
"""
result = list()
try:
with open(minddata_aicpu_source_path) as source_data_file:
source_data = source_data_file.read()
step_data = source_data.split("\x00")
for one_step in step_data:
if one_step:
node_info = one_step.split(", ")
node_name, node_start, node_end, queue_size = "", 0, 0, 0
if node_info:
node_name = node_info[0].replace("Node:", "")
if len(node_info) > 2:
node_start = node_info[1].replace("Run start:", "")
if node_start.isdigit():
node_start = int(node_start)
node_end = node_info[2].replace("Run end:", "")
if node_end.isdigit():
node_end = int(node_end)
if len(node_info) > 3:
queue_size = node_info[3].replace("queue size:", "")
if queue_size.isdigit():
queue_size = int(queue_size)
one_step_list = [node_name, node_start, node_end, queue_size]
result.append(one_step_list)
except OSError:
logger.error("Open get_next profiling file error.")
return result
@staticmethod
def execute(source_path, output_path, device_id):
"""
Execute the parser.
Args:
source_path (str): the source file path.
output_path (str): the output file path.
device_id (str): the device id.
"""
col_names = ["node_name", "start_time", "end_time", "queue_size"]
minddata_aicpu_source_path = get_file_join_name(
input_path=source_path, file_name='DATA_PREPROCESS.dev.AICPUMI')
if not minddata_aicpu_source_path:
minddata_aicpu_source_path = get_file_join_name(
input_path=os.path.join(source_path, "data"), file_name='DATA_PREPROCESS.dev.AICPUMI')
if not minddata_aicpu_source_path:
return
minddata_aicpu_output_path = os.path.join(output_path, "minddata_aicpu_" + device_id + ".txt")
minddata_aicpu_data = MinddataParser.parse_minddata_aicpu_data(minddata_aicpu_source_path)
if minddata_aicpu_data:
fwrite_format(minddata_aicpu_output_path, " ".join(col_names), is_start=True)
fwrite_format(minddata_aicpu_output_path, minddata_aicpu_data, is_start=True)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,245 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Op compute time files parser."""
import os
from mindspore.profiler.common.util import fwrite_format
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
ProfilerIOException
from mindspore import log as logger
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
from mindspore.profiler.parser.container import HWTSContainer
TIMELINE_FILE_COLUMN_TITLE = 'op_name, stream_id, start_time(ms), duration(ms)'
class OPComputeTimeParser:
"""
Join hwts info and framework info, get op time info, and output to the result file.
Args:
hwts_output_file (str): The file path of hwts_output_file. Such as: './output_format_data_hwts_0.txt".
output_filename (str): The output data file path and name. Such as: './output_op_compute_time_0.txt'.
op_task_info (dict): The task and op relation info. The format: {task_id, [opname, stream_id, block dim]}.
"""
_dst_file_title = 'title:op compute time'
_dst_file_column_title = 'op_name compute_time(ms) stream_id'
_dst_file_column_title += '\n------------ --------------- ---------'
def __init__(self, hwts_output_file, output_filename, op_task_info,
output_path, device_id):
hwts_output_file = validate_and_normalize_path(hwts_output_file)
self._hwts_output_file = hwts_output_file
self._output_filename = output_filename
self._op_task_info = op_task_info
self._output_path = output_path
self._device_id = device_id
self._min_cycle_counter = float("inf")
def _get_op_task_id_map(self):
"""
Read hwts data file, get the task time info.
Returns:
list: all hwts task time info.
"""
op_map_result = []
hwts_list = []
if not os.path.exists(self._hwts_output_file):
logger.error('The hwts output file does not exist.')
raise ProfilerFileNotFoundException('hwts output file')
with open(self._hwts_output_file, 'r') as data_file:
lines = data_file.readlines()
for line in lines:
if line.startswith("Start of task") or line.startswith("End of task"):
line_split = line.split()
container = HWTSContainer(line_split)
hwts_list.append(container)
# hwts op map by taskId
for hwts in hwts_list:
if hwts.task_id in self._op_task_info.keys():
hwts.op_name = self._op_task_info[hwts.task_id]
op_map_result.append(hwts)
return op_map_result
def execute(self):
"""Execute the parser, compute all op, get op time, and write it to the output file."""
# Calculate the execution time of operators,
# and update the minimum cycle counter.
tmp_result_data = self._calculate_op_execution_time()
# Convert time units from nanoseconds to milliseconds.
# The unit of the cycle counter is 10 nanoseconds.
op_name_time_dict = {}
op_name_stream_dict = {}
op_name_count_dict = {}
op_name_task_dict = {}
op_name_start_time = {}
self._convert_op_time_unit(
tmp_result_data, op_name_time_dict, op_name_stream_dict,
op_name_count_dict, op_name_task_dict, op_name_start_time
)
result_data = ""
total_time = 0
for op_name, time in op_name_time_dict.items():
if op_name in op_name_stream_dict.keys():
stream_id = op_name_stream_dict[op_name]
avg_time = time / op_name_count_dict[op_name]
total_time += avg_time
result_data += ("%s %s %s\n" %(op_name, str(avg_time), stream_id))
result_data += ("total op %s 0" %(str(total_time)))
timeline_data = []
for op_name, time in op_name_time_dict.items():
if op_name in op_name_stream_dict.keys():
stream_id = op_name_stream_dict[op_name]
start_time_list = op_name_start_time.get(op_name)
for (start_time, duration) in start_time_list:
timeline_data.append([op_name, stream_id, start_time, duration])
# Write the metadata of operators into the file,
# including operator name, average time, and stream id.
self._write_op_time_into_file(result_data)
# Write the timeline data into file,
# including operator name, stream id, start time, and duration.
self._write_timeline_data_into_file(timeline_data)
def _write_op_time_into_file(self, result_data):
"""
Write the metadata of operators into the file, including
op name, average time, and stream id.
Args:
result_data (str): The metadata to be written into the file.
'op_name_1', 'avg_time_1', 'stream_id_1',
'op_name_2', 'avg_time_2', 'stream_id_2',
...
"""
fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
fwrite_format(self._output_filename, data_source=result_data)
def _write_timeline_data_into_file(self, timeline_data):
"""
Write the timeline information into the file, including
operator name, stream id, start time and duration.
Args:
timeline_data (list): The metadata to be written into the file.
[
['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
[...]
]
"""
# sorted by start times
timeline_data.sort(key=lambda x: float(x[2]))
filename = 'output_timeline_data_{}.txt'.format(self._device_id)
file_path = os.path.join(self._output_path, filename)
file_path = validate_and_normalize_path(file_path)
# write to file
try:
with open(file_path, 'w') as f_obj:
f_obj.write(TIMELINE_FILE_COLUMN_TITLE + '\n')
for timeline in timeline_data:
timeline = [str(item) for item in timeline]
f_obj.write(','.join(timeline) + '\n')
except (IOError, OSError) as err:
logger.error('Error occurred when writing intermediate timeline file: %s', err)
raise ProfilerIOException
def _calculate_op_execution_time(self):
"""
Calculate the execution time of each operator.
Returns:
list, including the intermediate data of op execution time.
"""
tmp_result_data = []
op_map_list = self._get_op_task_id_map()
cur_index = 0
length = len(op_map_list)
min_cycle_counter = float("inf")
while cur_index < length:
if cur_index + 1 == length:
break
op_start = op_map_list[cur_index]
op_end = op_map_list[cur_index + 1]
if op_start.status == "Start" and op_end.status == "End" \
and op_start.op_name == op_end.op_name:
op_start.duration = op_end.cycle_counter - op_start.cycle_counter
tmp_result_data.append(op_start)
cur_index += 2
if not op_start.op_name.startswith("assign"):
min_cycle_counter = min(min_cycle_counter, op_start.cycle_counter)
else:
cur_index += 1
# Update the value of minimum cycle counter.
self._min_cycle_counter = min_cycle_counter / 1e5 # Convert the time unit from 10ns to 1ms
return tmp_result_data
def _convert_op_time_unit(self, op_data_list, op_name_time_dict, op_name_stream_dict,
op_name_count_dict, op_name_task_dict, op_name_start_time):
"""
Calculate the execution time of operator and convert it into millisecond.
Args:
op_data_list (list): The list of operator metadata.
op_name_time_dict (dict): The mapping relation of operator name and its execution time.
op_name_stream_dict (dict): The mapping relation of operator name and its stream id.
op_name_count_dict (dict): The mapping relation of operator name and its count.
op_name_task_dict (dict): The mapping relation of operator name and its task id.
op_name_start_time (dict): The mapping relation of operator name and its start time.
"""
factor = 1e5
for item in op_data_list:
op_name = item.op_name
# Unit conversion: converting the cycle counter into ms.
op_start_time_str = str(item.cycle_counter / factor)
op_duration = item.duration / factor
op_duration_str = str(item.duration / factor)
if op_name in op_name_time_dict.keys():
op_name_time_dict[op_name] += op_duration
if item.task_id == op_name_task_dict[op_name]:
op_name_count_dict[op_name] += 1
op_name_start_time[op_name].append(
(op_start_time_str, op_duration_str)
)
else:
op_name_time_dict[op_name] = op_duration
op_name_stream_dict[op_name] = item.stream_id
op_name_task_dict[op_name] = item.task_id
op_name_count_dict[op_name] = 1
op_name_start_time[op_name] = []
op_name_start_time[op_name].append(
(op_start_time_str, op_duration_str)
)
@property
def min_cycle_counter(self):
"""Get minimum cycle counter."""
return self._min_cycle_counter

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save