parent
180b3029e5
commit
ed9cc50551
@ -0,0 +1,27 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
Profiler Module Introduction.
|
||||
|
||||
This module provides Python APIs to enable the profiling of MindSpore neural networks.
|
||||
Users can import the mindspore.profiler.Profiler, initialize the Profiler object to start profiling,
|
||||
and use Profiler.analyse() to stop profiling and analyse the results.
|
||||
To visualize the profiling results, users can open mindspore Web, find the corresponding run
|
||||
and click the profile link.
|
||||
Now, Profiler supports the AICore operator analysis.
|
||||
"""
|
||||
from mindspore.profiler.profiling import Profiler
|
||||
|
||||
__all__ = ["Profiler"]
|
@ -0,0 +1,14 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
@ -0,0 +1,14 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
@ -0,0 +1,85 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Profiler error code and messages."""
|
||||
from enum import unique, Enum
|
||||
|
||||
|
||||
_GENERAL_MASK = 0b00001 << 7
|
||||
_PARSER_MASK = 0b00010 << 7
|
||||
_ANALYSER_MASK = 0b00011 << 7
|
||||
|
||||
|
||||
class ProfilerMgrErrors(Enum):
|
||||
"""Enum definition for profiler errors"""
|
||||
|
||||
@unique
|
||||
class ProfilerErrors(ProfilerMgrErrors):
|
||||
"""Profiler error codes."""
|
||||
# general error code
|
||||
PARAM_VALUE_ERROR = 0 | _GENERAL_MASK
|
||||
PATH_ERROR = 1 | _GENERAL_MASK
|
||||
PARAM_TYPE_ERROR = 2 | _GENERAL_MASK
|
||||
DIR_NOT_FOUND_ERROR = 3 | _GENERAL_MASK
|
||||
FILE_NOT_FOUND_ERROR = 4 | _GENERAL_MASK
|
||||
IO_ERROR = 5 | _GENERAL_MASK
|
||||
|
||||
# parser error code
|
||||
DEVICE_ID_MISMATCH_ERROR = 0 | _PARSER_MASK
|
||||
RAW_FILE_ERROR = 1 | _PARSER_MASK
|
||||
STEP_NUM_NOT_SUPPORTED_ERROR = 2 | _PARSER_MASK
|
||||
JOB_ID_MISMATCH_ERROR = 3 | _PARSER_MASK
|
||||
|
||||
# analyser error code
|
||||
COLUMN_NOT_EXIST_ERROR = 0 | _ANALYSER_MASK
|
||||
ANALYSER_NOT_EXIST_ERROR = 1 | _ANALYSER_MASK
|
||||
DEVICE_ID_ERROR = 2 | _ANALYSER_MASK
|
||||
OP_TYPE_ERROR = 3 | _ANALYSER_MASK
|
||||
GROUP_CONDITION_ERROR = 4 | _ANALYSER_MASK
|
||||
SORT_CONDITION_ERROR = 5 | _ANALYSER_MASK
|
||||
FILTER_CONDITION_ERROR = 6 | _ANALYSER_MASK
|
||||
COLUMN_NOT_SUPPORT_SORT_ERROR = 7 | _ANALYSER_MASK
|
||||
PIPELINE_OP_NOT_EXIST_ERROR = 8 | _ANALYSER_MASK
|
||||
|
||||
|
||||
|
||||
|
||||
@unique
|
||||
class ProfilerErrorMsg(Enum):
|
||||
"""Profiler error messages."""
|
||||
# general error msg
|
||||
PARAM_VALUE_ERROR = 'Param value error. {}'
|
||||
PATH_ERROR = 'Path error. {}'
|
||||
PARAM_TYPE_ERROR = 'Param type error. {}'
|
||||
DIR_NOT_FOUND_ERROR = 'The dir <{}> not found.'
|
||||
FILE_NOT_FOUND_ERROR = 'The file <{}> not found.'
|
||||
IO_ERROR = 'Read or write file fail.'
|
||||
|
||||
# parser error msg
|
||||
DEVICE_ID_MISMATCH_ERROR = 'The device ID mismatch.'
|
||||
RAW_FILE_ERROR = 'Raw file error. {}'
|
||||
STEP_NUM_NOT_SUPPORTED_ERROR = 'The step num must be in {}'
|
||||
JOB_ID_MISMATCH_ERROR = 'The job id in the parameter is not the same as ' \
|
||||
'in the training trace file. '
|
||||
|
||||
# analyser error msg
|
||||
COLUMN_NOT_EXIST_ERROR = 'The column {} does not exist.'
|
||||
ANALYSER_NOT_EXIST_ERROR = 'The analyser {} does not exist.'
|
||||
DEIVICE_ID_ERROR = 'The device_id in search_condition error, {}'
|
||||
FILTER_CONDITION_ERROR = 'The filter_condition in search_condition error, {}'
|
||||
OP_TYPE_ERROR = 'The op_type in search_condition error, {}'
|
||||
GROUP_CONDITION_ERROR = 'The group_condition in search_condition error, {}'
|
||||
SORT_CONDITION_ERROR = 'The sort_condition in search_condition error, {}'
|
||||
COLUMN_NOT_SUPPORT_SORT_ERROR = 'The column {} does not support to sort.'
|
||||
PIPELINE_OP_NOT_EXIST_ERROR = 'The minddata pipeline operator {} does not exist.'
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,14 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
@ -0,0 +1,26 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Profiler check parameters."""
|
||||
def check_bool(input_param, param_name):
|
||||
"""Bool type judgment."""
|
||||
if isinstance(input_param, bool):
|
||||
return input_param
|
||||
raise TypeError("Parameter {}: input type must be bool!".format(param_name))
|
||||
|
||||
def check_subgraph(subgraph):
|
||||
"""Check subgraph."""
|
||||
if subgraph in ("all", "Default", "Gradients"):
|
||||
return subgraph
|
||||
raise ValueError("subgraph must be all or Default or Gradients, but got {}.".format(subgraph))
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,60 @@
|
||||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Validate the input path."""
|
||||
import os
|
||||
|
||||
|
||||
def validate_and_normalize_path(
|
||||
path,
|
||||
check_absolute_path=False,
|
||||
allow_parent_dir=False,
|
||||
):
|
||||
"""
|
||||
Validates path and returns its normalized form.
|
||||
|
||||
If path has a valid scheme, treat path as url, otherwise consider path a
|
||||
unix local path.
|
||||
|
||||
Note:
|
||||
File scheme (rfc8089) is currently not supported.
|
||||
|
||||
Args:
|
||||
path (str): Path to be normalized.
|
||||
check_absolute_path (bool): Whether check path scheme is supported.
|
||||
allow_parent_dir (bool): Whether allow parent dir in path.
|
||||
|
||||
Returns:
|
||||
str, normalized path.
|
||||
"""
|
||||
if not path:
|
||||
raise RuntimeError("The path is invalid!")
|
||||
|
||||
path_str = str(path)
|
||||
if not allow_parent_dir:
|
||||
path_components = path_str.split("/")
|
||||
if ".." in path_components:
|
||||
raise RuntimeError("The path is invalid!")
|
||||
|
||||
# path does not have valid schema, treat it as unix local path.
|
||||
if check_absolute_path:
|
||||
if not path_str.startswith("/"):
|
||||
raise RuntimeError("The path is invalid!")
|
||||
try:
|
||||
# most unix systems allow
|
||||
normalized_path = os.path.realpath(path)
|
||||
except ValueError:
|
||||
raise RuntimeError("The path is invalid!")
|
||||
|
||||
return normalized_path
|
@ -0,0 +1,14 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
@ -0,0 +1,175 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
The parser for AI CPU preprocess data.
|
||||
"""
|
||||
import os
|
||||
|
||||
from mindspore.profiler.common.util import fwrite_format, get_file_join_name
|
||||
from mindspore import log as logger
|
||||
|
||||
|
||||
class DataPreProcessParser:
|
||||
"""
|
||||
The Parser for AI CPU preprocess data.
|
||||
|
||||
Args:
|
||||
input_path(str): The profiling job path.
|
||||
output_filename(str): The output data path and name.
|
||||
|
||||
"""
|
||||
|
||||
_source_file_target = 'DATA_PREPROCESS.dev.AICPU.'
|
||||
_dst_file_title = 'title:DATA_PREPROCESS AICPU'
|
||||
_dst_file_column_title = ['serial_number', 'node_type_name', 'total_time(ms)',
|
||||
'dispatch_time(ms)', 'run_start', 'run_end']
|
||||
_ms_unit = 1000
|
||||
|
||||
def __init__(self, input_path, output_filename):
|
||||
self._input_path = input_path
|
||||
self._output_filename = output_filename
|
||||
self._source_file_name = self._get_source_file()
|
||||
self._ms_kernel_flag = 3
|
||||
self._other_kernel_flag = 6
|
||||
self._thread_flag = 7
|
||||
self._ms_kernel_run_end_index = 2
|
||||
self._other_kernel_run_end_index = 5
|
||||
self._result_list = []
|
||||
self._min_cycle_counter = float('inf')
|
||||
|
||||
def _get_source_file(self):
|
||||
"""Get log file name, which was created by ada service."""
|
||||
file_name = get_file_join_name(self._input_path, self._source_file_target)
|
||||
if not file_name:
|
||||
data_path = os.path.join(self._input_path, "data")
|
||||
file_name = get_file_join_name(data_path, self._source_file_target)
|
||||
return file_name
|
||||
|
||||
def _get_kernel_result(self, number, node_list, thread_list):
|
||||
"""Get the profiling data form different aicpu kernel"""
|
||||
try:
|
||||
if len(node_list) == self._ms_kernel_flag and len(thread_list) == self._thread_flag:
|
||||
node_type_name = node_list[0].split(':')[-1]
|
||||
run_end_index = self._ms_kernel_run_end_index
|
||||
elif len(node_list) == self._other_kernel_flag and len(thread_list) == self._thread_flag:
|
||||
node_type_name = node_list[0].split(':')[-1].split('/')[-1].split('-')[0]
|
||||
run_end_index = self._other_kernel_run_end_index
|
||||
else:
|
||||
logger.warning("the data format can't support 'node_list':%s", str(node_list))
|
||||
return None
|
||||
|
||||
run_start = node_list[1].split(':')[-1].split(' ')[0]
|
||||
run_end = node_list[run_end_index].split(':')[-1].split(' ')[0]
|
||||
total_time = float(thread_list[-1].split('=')[-1].split()[0]) / self._ms_unit
|
||||
dispatch_time = float(thread_list[-2].split('=')[-1].split()[0]) / self._ms_unit
|
||||
|
||||
return [number, node_type_name, total_time, dispatch_time,
|
||||
run_start, run_end]
|
||||
except IndexError as e:
|
||||
logger.error(e)
|
||||
return None
|
||||
|
||||
def execute(self):
|
||||
"""Execute the parser, get result data, and write it to the output file."""
|
||||
|
||||
if not os.path.exists(self._source_file_name):
|
||||
logger.info("Did not find the aicpu profiling source file")
|
||||
return
|
||||
|
||||
with open(self._source_file_name, 'rb') as ai_cpu_data:
|
||||
ai_cpu_str = str(ai_cpu_data.read().replace(b'\n\x00', b' ___ ')
|
||||
.replace(b'\x00', b' ___ '))[2:-1]
|
||||
ai_cpu_lines = ai_cpu_str.split(" ___ ")
|
||||
|
||||
result_list = list()
|
||||
ai_cpu_total_time_summary = 0
|
||||
# Node serial number.
|
||||
serial_number = 1
|
||||
for i in range(len(ai_cpu_lines) - 1):
|
||||
node_line = ai_cpu_lines[i]
|
||||
thread_line = ai_cpu_lines[i + 1]
|
||||
if "Node" in node_line and "Thread" in thread_line:
|
||||
# Get the node data from node_line
|
||||
node_list = node_line.split(',')
|
||||
thread_list = thread_line.split(',')
|
||||
result = self._get_kernel_result(serial_number, node_list, thread_list)
|
||||
|
||||
if result is None:
|
||||
continue
|
||||
|
||||
result_list.append(result)
|
||||
# Calculate the total time.
|
||||
total_time = result[2]
|
||||
ai_cpu_total_time_summary += total_time
|
||||
# Increase node serial number.
|
||||
serial_number += 1
|
||||
elif "Node" in node_line and "Thread" not in thread_line:
|
||||
node_type_name = node_line.split(',')[0].split(':')[-1]
|
||||
logger.warning("The node type:%s cannot find thread data", node_type_name)
|
||||
|
||||
if result_list:
|
||||
ai_cpu_total_time = format(ai_cpu_total_time_summary, '.6f')
|
||||
result_list.append(["AI CPU Total Time(ms):", ai_cpu_total_time])
|
||||
fwrite_format(self._output_filename, " ".join(self._dst_file_column_title), is_start=True, is_print=True)
|
||||
fwrite_format(self._output_filename, result_list, is_print=True)
|
||||
|
||||
# For timeline display.
|
||||
self._result_list = result_list
|
||||
|
||||
def query_aicpu_data(self):
|
||||
"""
|
||||
Get execution time of AI CPU operator.
|
||||
|
||||
Returns:
|
||||
a dict, the metadata of AI CPU operator execution time.
|
||||
"""
|
||||
stream_id = 0 # Default stream id for AI CPU.
|
||||
pid = 9000 # Default pid for AI CPU.
|
||||
factor = 1000 # Convert time unit from 1us to 1ms
|
||||
total_time = 0
|
||||
min_cycle_counter = float('inf')
|
||||
aicpu_info = []
|
||||
op_count_list = []
|
||||
for aicpu_item in self._result_list:
|
||||
if "AI CPU Total Time(ms):" in aicpu_item:
|
||||
total_time = aicpu_item[-1]
|
||||
continue
|
||||
|
||||
op_name = aicpu_item[1]
|
||||
start_time = float(aicpu_item[4]) / factor
|
||||
min_cycle_counter = min(min_cycle_counter, start_time)
|
||||
end_time = float(aicpu_item[5]) / factor
|
||||
duration = end_time - start_time
|
||||
aicpu_info.append([op_name, stream_id, start_time, duration, pid])
|
||||
|
||||
# Record the number of operator types.
|
||||
if op_name not in op_count_list:
|
||||
op_count_list.append(op_name)
|
||||
|
||||
self._min_cycle_counter = min_cycle_counter
|
||||
aicpu_dict = {
|
||||
'info': aicpu_info,
|
||||
'total_time': float(total_time),
|
||||
'op_exe_times': len(aicpu_info),
|
||||
'num_of_ops': len(op_count_list),
|
||||
'num_of_streams': 1
|
||||
}
|
||||
|
||||
return aicpu_dict
|
||||
|
||||
@property
|
||||
def min_cycle_counter(self):
|
||||
"""Get minimum cycle counter in AI CPU."""
|
||||
return self._min_cycle_counter
|
@ -0,0 +1,113 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""The container of metadata used in profiler parser."""
|
||||
|
||||
|
||||
class HWTSContainer:
|
||||
"""
|
||||
HWTS output container.
|
||||
|
||||
Args:
|
||||
split_list (list): The split list of metadata in HWTS output file.
|
||||
"""
|
||||
def __init__(self, split_list):
|
||||
self._op_name = ''
|
||||
self._duration = None
|
||||
self._status = split_list[0]
|
||||
self._task_id = split_list[6]
|
||||
self._cycle_counter = float(split_list[7])
|
||||
self._stream_id = split_list[8]
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
"""Get the status of the operator, i.e. Start or End."""
|
||||
return self._status
|
||||
|
||||
@property
|
||||
def task_id(self):
|
||||
"""Get the task id of the operator."""
|
||||
return self._task_id
|
||||
|
||||
@property
|
||||
def cycle_counter(self):
|
||||
"""Get the cycle counter."""
|
||||
return self._cycle_counter
|
||||
|
||||
@property
|
||||
def stream_id(self):
|
||||
"""Get the stream id of the operator."""
|
||||
return self._stream_id
|
||||
|
||||
@property
|
||||
def op_name(self):
|
||||
"""Get the name of the operator."""
|
||||
return self._op_name
|
||||
|
||||
@op_name.setter
|
||||
def op_name(self, name):
|
||||
"""Set the name of the operator."""
|
||||
self._op_name = name
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
"""Get the duration of the operator execution."""
|
||||
return self._duration
|
||||
|
||||
@duration.setter
|
||||
def duration(self, value):
|
||||
"""Set the duration of the operator execution."""
|
||||
self._duration = value
|
||||
|
||||
|
||||
class TimelineContainer:
|
||||
"""
|
||||
A container of operator computation metadata.
|
||||
|
||||
Args:
|
||||
split_list (list): The split list of metadata in op_compute output file.
|
||||
"""
|
||||
def __init__(self, split_list):
|
||||
self._op_name = split_list[0]
|
||||
self._stream_id = int(split_list[1])
|
||||
self._start_time = float(split_list[2])
|
||||
self._duration = float(split_list[3])
|
||||
self._pid = None
|
||||
if len(split_list) == 5:
|
||||
self._pid = int(split_list[4])
|
||||
|
||||
@property
|
||||
def op_name(self):
|
||||
"""Get the name of the operator."""
|
||||
return self._op_name
|
||||
|
||||
@property
|
||||
def stream_id(self):
|
||||
"""Get the stream id of the operator."""
|
||||
return self._stream_id
|
||||
|
||||
@property
|
||||
def start_time(self):
|
||||
"""Get the execution start time of the operator."""
|
||||
return self._start_time
|
||||
|
||||
@property
|
||||
def duration(self):
|
||||
"""Get the duration of the operator execution."""
|
||||
return self._duration
|
||||
|
||||
@property
|
||||
def pid(self):
|
||||
"""Get the pid of the operator execution."""
|
||||
return self._pid
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,109 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""The parser for hwts log file."""
|
||||
import os
|
||||
import struct
|
||||
from mindspore.profiler.common.util import fwrite_format, get_file_join_name
|
||||
from mindspore import log as logger
|
||||
|
||||
|
||||
class HWTSLogParser:
|
||||
"""
|
||||
The Parser for hwts log files.
|
||||
|
||||
Args:
|
||||
input_path (str): The profiling job path. Such as: '/var/log/npu/profiling/JOBAIFGJEJFEDCBAEADIFJAAAAAAAAAA".
|
||||
output_filename (str): The output data path and name. Such as: './output_format_data_hwts_0.txt'.
|
||||
"""
|
||||
|
||||
_source_file_target = 'hwts.log.data.45.dev.profiler_default_tag'
|
||||
_dst_file_title = 'title:45 HWTS data'
|
||||
_dst_file_column_title = 'Type cnt Core_ID Block_ID Task_ID Cycle_counter Stream_ID'
|
||||
|
||||
def __init__(self, input_path, output_filename):
|
||||
self._input_path = input_path
|
||||
self._output_filename = output_filename
|
||||
self._source_flie_name = self._get_source_file()
|
||||
|
||||
def _get_source_file(self):
|
||||
"""Get hwts log file name, which was created by ada service."""
|
||||
|
||||
file_name = get_file_join_name(self._input_path, self._source_file_target)
|
||||
if not file_name:
|
||||
data_path = os.path.join(self._input_path, "data")
|
||||
file_name = get_file_join_name(data_path, self._source_file_target)
|
||||
if not file_name:
|
||||
msg = "Fail to find hwts log file, under profiling directory"
|
||||
raise RuntimeError(msg)
|
||||
|
||||
return file_name
|
||||
|
||||
def execute(self):
|
||||
"""
|
||||
Execute the parser, get result data, and write it to the output file.
|
||||
|
||||
Returns:
|
||||
bool, whether succeed to analyse hwts log.
|
||||
"""
|
||||
|
||||
content_format = ['QIIIIIIIIIIII', 'QIIQIIIIIIII', 'IIIIQIIIIIIII']
|
||||
log_type = ['Start of task', 'End of task', 'Start of block', 'End of block', 'Block PMU']
|
||||
|
||||
result_data = ""
|
||||
|
||||
with open(self._source_flie_name, 'rb') as hwts_data:
|
||||
while True:
|
||||
line = hwts_data.read(64)
|
||||
if line:
|
||||
if not line.strip():
|
||||
continue
|
||||
else:
|
||||
break
|
||||
byte_first_four = struct.unpack('BBHHH', line[0:8])
|
||||
byte_first = bin(byte_first_four[0]).replace('0b', '').zfill(8)
|
||||
ms_type = byte_first[-3:]
|
||||
is_warn_res0_ov = byte_first[4]
|
||||
cnt = int(byte_first[0:4], 2)
|
||||
core_id = byte_first_four[1]
|
||||
blk_id, task_id = byte_first_four[3], byte_first_four[4]
|
||||
if ms_type in ['000', '001', '010']: # log type 0,1,2
|
||||
result = struct.unpack(content_format[0], line[8:])
|
||||
syscnt = result[0]
|
||||
stream_id = result[1]
|
||||
elif ms_type == '011': # log type 3
|
||||
result = struct.unpack(content_format[1], line[8:])
|
||||
syscnt = result[0]
|
||||
stream_id = result[1]
|
||||
elif ms_type == '100': # log type 4
|
||||
result = struct.unpack(content_format[2], line[8:])
|
||||
stream_id = result[2]
|
||||
if is_warn_res0_ov == '0':
|
||||
syscnt = result[4]
|
||||
else:
|
||||
syscnt = None
|
||||
else:
|
||||
logger.info("Profiling: invalid hwts log record type %s", ms_type)
|
||||
continue
|
||||
|
||||
if int(task_id) < 25000:
|
||||
task_id = str(stream_id) + "_" + str(task_id)
|
||||
result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" %(log_type[int(ms_type, 2)], cnt, core_id,
|
||||
blk_id, task_id, syscnt, stream_id))
|
||||
|
||||
fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
|
||||
fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
|
||||
fwrite_format(self._output_filename, data_source=result_data)
|
||||
|
||||
return True
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,88 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Minddata aicpu parser."""
|
||||
import os
|
||||
|
||||
from mindspore.profiler.common.util import get_file_join_name, fwrite_format
|
||||
from mindspore import log as logger
|
||||
|
||||
|
||||
class MinddataParser:
|
||||
"""Minddata Aicpu Parser."""
|
||||
@staticmethod
|
||||
def parse_minddata_aicpu_data(minddata_aicpu_source_path):
|
||||
"""
|
||||
Parse minddata get_next info which contains queue size and execute time.
|
||||
|
||||
Args:
|
||||
minddata_aicpu_source_path (str): the source file path.
|
||||
|
||||
Returns:
|
||||
list[Union[str, float]], the converted data.
|
||||
"""
|
||||
result = list()
|
||||
try:
|
||||
with open(minddata_aicpu_source_path) as source_data_file:
|
||||
source_data = source_data_file.read()
|
||||
step_data = source_data.split("\x00")
|
||||
for one_step in step_data:
|
||||
if one_step:
|
||||
node_info = one_step.split(", ")
|
||||
node_name, node_start, node_end, queue_size = "", 0, 0, 0
|
||||
if node_info:
|
||||
node_name = node_info[0].replace("Node:", "")
|
||||
if len(node_info) > 2:
|
||||
node_start = node_info[1].replace("Run start:", "")
|
||||
if node_start.isdigit():
|
||||
node_start = int(node_start)
|
||||
node_end = node_info[2].replace("Run end:", "")
|
||||
if node_end.isdigit():
|
||||
node_end = int(node_end)
|
||||
if len(node_info) > 3:
|
||||
queue_size = node_info[3].replace("queue size:", "")
|
||||
if queue_size.isdigit():
|
||||
queue_size = int(queue_size)
|
||||
|
||||
one_step_list = [node_name, node_start, node_end, queue_size]
|
||||
result.append(one_step_list)
|
||||
except OSError:
|
||||
logger.error("Open get_next profiling file error.")
|
||||
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def execute(source_path, output_path, device_id):
|
||||
"""
|
||||
Execute the parser.
|
||||
|
||||
Args:
|
||||
source_path (str): the source file path.
|
||||
output_path (str): the output file path.
|
||||
device_id (str): the device id.
|
||||
"""
|
||||
col_names = ["node_name", "start_time", "end_time", "queue_size"]
|
||||
minddata_aicpu_source_path = get_file_join_name(
|
||||
input_path=source_path, file_name='DATA_PREPROCESS.dev.AICPUMI')
|
||||
if not minddata_aicpu_source_path:
|
||||
minddata_aicpu_source_path = get_file_join_name(
|
||||
input_path=os.path.join(source_path, "data"), file_name='DATA_PREPROCESS.dev.AICPUMI')
|
||||
if not minddata_aicpu_source_path:
|
||||
return
|
||||
minddata_aicpu_output_path = os.path.join(output_path, "minddata_aicpu_" + device_id + ".txt")
|
||||
|
||||
minddata_aicpu_data = MinddataParser.parse_minddata_aicpu_data(minddata_aicpu_source_path)
|
||||
if minddata_aicpu_data:
|
||||
fwrite_format(minddata_aicpu_output_path, " ".join(col_names), is_start=True)
|
||||
fwrite_format(minddata_aicpu_output_path, minddata_aicpu_data, is_start=True)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,245 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Op compute time files parser."""
|
||||
import os
|
||||
from mindspore.profiler.common.util import fwrite_format
|
||||
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
||||
ProfilerIOException
|
||||
from mindspore import log as logger
|
||||
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
||||
from mindspore.profiler.parser.container import HWTSContainer
|
||||
|
||||
TIMELINE_FILE_COLUMN_TITLE = 'op_name, stream_id, start_time(ms), duration(ms)'
|
||||
|
||||
class OPComputeTimeParser:
|
||||
"""
|
||||
Join hwts info and framework info, get op time info, and output to the result file.
|
||||
|
||||
Args:
|
||||
hwts_output_file (str): The file path of hwts_output_file. Such as: './output_format_data_hwts_0.txt".
|
||||
output_filename (str): The output data file path and name. Such as: './output_op_compute_time_0.txt'.
|
||||
op_task_info (dict): The task and op relation info. The format: {task_id, [opname, stream_id, block dim]}.
|
||||
"""
|
||||
|
||||
_dst_file_title = 'title:op compute time'
|
||||
_dst_file_column_title = 'op_name compute_time(ms) stream_id'
|
||||
_dst_file_column_title += '\n------------ --------------- ---------'
|
||||
|
||||
def __init__(self, hwts_output_file, output_filename, op_task_info,
|
||||
output_path, device_id):
|
||||
hwts_output_file = validate_and_normalize_path(hwts_output_file)
|
||||
self._hwts_output_file = hwts_output_file
|
||||
self._output_filename = output_filename
|
||||
self._op_task_info = op_task_info
|
||||
self._output_path = output_path
|
||||
self._device_id = device_id
|
||||
self._min_cycle_counter = float("inf")
|
||||
|
||||
def _get_op_task_id_map(self):
|
||||
"""
|
||||
Read hwts data file, get the task time info.
|
||||
|
||||
Returns:
|
||||
list: all hwts task time info.
|
||||
"""
|
||||
|
||||
op_map_result = []
|
||||
hwts_list = []
|
||||
|
||||
if not os.path.exists(self._hwts_output_file):
|
||||
logger.error('The hwts output file does not exist.')
|
||||
raise ProfilerFileNotFoundException('hwts output file')
|
||||
|
||||
with open(self._hwts_output_file, 'r') as data_file:
|
||||
lines = data_file.readlines()
|
||||
for line in lines:
|
||||
if line.startswith("Start of task") or line.startswith("End of task"):
|
||||
line_split = line.split()
|
||||
container = HWTSContainer(line_split)
|
||||
hwts_list.append(container)
|
||||
|
||||
# hwts op map by taskId
|
||||
for hwts in hwts_list:
|
||||
if hwts.task_id in self._op_task_info.keys():
|
||||
hwts.op_name = self._op_task_info[hwts.task_id]
|
||||
op_map_result.append(hwts)
|
||||
|
||||
return op_map_result
|
||||
|
||||
def execute(self):
|
||||
"""Execute the parser, compute all op, get op time, and write it to the output file."""
|
||||
# Calculate the execution time of operators,
|
||||
# and update the minimum cycle counter.
|
||||
tmp_result_data = self._calculate_op_execution_time()
|
||||
|
||||
# Convert time units from nanoseconds to milliseconds.
|
||||
# The unit of the cycle counter is 10 nanoseconds.
|
||||
op_name_time_dict = {}
|
||||
op_name_stream_dict = {}
|
||||
op_name_count_dict = {}
|
||||
op_name_task_dict = {}
|
||||
op_name_start_time = {}
|
||||
self._convert_op_time_unit(
|
||||
tmp_result_data, op_name_time_dict, op_name_stream_dict,
|
||||
op_name_count_dict, op_name_task_dict, op_name_start_time
|
||||
)
|
||||
|
||||
result_data = ""
|
||||
total_time = 0
|
||||
for op_name, time in op_name_time_dict.items():
|
||||
if op_name in op_name_stream_dict.keys():
|
||||
stream_id = op_name_stream_dict[op_name]
|
||||
avg_time = time / op_name_count_dict[op_name]
|
||||
total_time += avg_time
|
||||
result_data += ("%s %s %s\n" %(op_name, str(avg_time), stream_id))
|
||||
result_data += ("total op %s 0" %(str(total_time)))
|
||||
|
||||
timeline_data = []
|
||||
for op_name, time in op_name_time_dict.items():
|
||||
if op_name in op_name_stream_dict.keys():
|
||||
stream_id = op_name_stream_dict[op_name]
|
||||
start_time_list = op_name_start_time.get(op_name)
|
||||
for (start_time, duration) in start_time_list:
|
||||
timeline_data.append([op_name, stream_id, start_time, duration])
|
||||
|
||||
# Write the metadata of operators into the file,
|
||||
# including operator name, average time, and stream id.
|
||||
self._write_op_time_into_file(result_data)
|
||||
# Write the timeline data into file,
|
||||
# including operator name, stream id, start time, and duration.
|
||||
self._write_timeline_data_into_file(timeline_data)
|
||||
|
||||
def _write_op_time_into_file(self, result_data):
|
||||
"""
|
||||
Write the metadata of operators into the file, including
|
||||
op name, average time, and stream id.
|
||||
|
||||
Args:
|
||||
result_data (str): The metadata to be written into the file.
|
||||
'op_name_1', 'avg_time_1', 'stream_id_1',
|
||||
'op_name_2', 'avg_time_2', 'stream_id_2',
|
||||
...
|
||||
"""
|
||||
|
||||
fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
|
||||
fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
|
||||
fwrite_format(self._output_filename, data_source=result_data)
|
||||
|
||||
def _write_timeline_data_into_file(self, timeline_data):
|
||||
"""
|
||||
Write the timeline information into the file, including
|
||||
operator name, stream id, start time and duration.
|
||||
|
||||
Args:
|
||||
timeline_data (list): The metadata to be written into the file.
|
||||
[
|
||||
['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
|
||||
['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
|
||||
[...]
|
||||
]
|
||||
"""
|
||||
# sorted by start times
|
||||
timeline_data.sort(key=lambda x: float(x[2]))
|
||||
filename = 'output_timeline_data_{}.txt'.format(self._device_id)
|
||||
file_path = os.path.join(self._output_path, filename)
|
||||
file_path = validate_and_normalize_path(file_path)
|
||||
|
||||
# write to file
|
||||
try:
|
||||
with open(file_path, 'w') as f_obj:
|
||||
f_obj.write(TIMELINE_FILE_COLUMN_TITLE + '\n')
|
||||
for timeline in timeline_data:
|
||||
timeline = [str(item) for item in timeline]
|
||||
f_obj.write(','.join(timeline) + '\n')
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Error occurred when writing intermediate timeline file: %s', err)
|
||||
raise ProfilerIOException
|
||||
|
||||
def _calculate_op_execution_time(self):
|
||||
"""
|
||||
Calculate the execution time of each operator.
|
||||
|
||||
Returns:
|
||||
list, including the intermediate data of op execution time.
|
||||
"""
|
||||
tmp_result_data = []
|
||||
op_map_list = self._get_op_task_id_map()
|
||||
|
||||
cur_index = 0
|
||||
length = len(op_map_list)
|
||||
min_cycle_counter = float("inf")
|
||||
while cur_index < length:
|
||||
if cur_index + 1 == length:
|
||||
break
|
||||
|
||||
op_start = op_map_list[cur_index]
|
||||
op_end = op_map_list[cur_index + 1]
|
||||
if op_start.status == "Start" and op_end.status == "End" \
|
||||
and op_start.op_name == op_end.op_name:
|
||||
op_start.duration = op_end.cycle_counter - op_start.cycle_counter
|
||||
tmp_result_data.append(op_start)
|
||||
cur_index += 2
|
||||
if not op_start.op_name.startswith("assign"):
|
||||
min_cycle_counter = min(min_cycle_counter, op_start.cycle_counter)
|
||||
else:
|
||||
cur_index += 1
|
||||
|
||||
# Update the value of minimum cycle counter.
|
||||
self._min_cycle_counter = min_cycle_counter / 1e5 # Convert the time unit from 10ns to 1ms
|
||||
|
||||
return tmp_result_data
|
||||
|
||||
def _convert_op_time_unit(self, op_data_list, op_name_time_dict, op_name_stream_dict,
|
||||
op_name_count_dict, op_name_task_dict, op_name_start_time):
|
||||
"""
|
||||
Calculate the execution time of operator and convert it into millisecond.
|
||||
|
||||
Args:
|
||||
op_data_list (list): The list of operator metadata.
|
||||
op_name_time_dict (dict): The mapping relation of operator name and its execution time.
|
||||
op_name_stream_dict (dict): The mapping relation of operator name and its stream id.
|
||||
op_name_count_dict (dict): The mapping relation of operator name and its count.
|
||||
op_name_task_dict (dict): The mapping relation of operator name and its task id.
|
||||
op_name_start_time (dict): The mapping relation of operator name and its start time.
|
||||
"""
|
||||
factor = 1e5
|
||||
for item in op_data_list:
|
||||
op_name = item.op_name
|
||||
# Unit conversion: converting the cycle counter into ms.
|
||||
op_start_time_str = str(item.cycle_counter / factor)
|
||||
op_duration = item.duration / factor
|
||||
op_duration_str = str(item.duration / factor)
|
||||
if op_name in op_name_time_dict.keys():
|
||||
op_name_time_dict[op_name] += op_duration
|
||||
if item.task_id == op_name_task_dict[op_name]:
|
||||
op_name_count_dict[op_name] += 1
|
||||
op_name_start_time[op_name].append(
|
||||
(op_start_time_str, op_duration_str)
|
||||
)
|
||||
|
||||
else:
|
||||
op_name_time_dict[op_name] = op_duration
|
||||
op_name_stream_dict[op_name] = item.stream_id
|
||||
op_name_task_dict[op_name] = item.task_id
|
||||
op_name_count_dict[op_name] = 1
|
||||
op_name_start_time[op_name] = []
|
||||
op_name_start_time[op_name].append(
|
||||
(op_start_time_str, op_duration_str)
|
||||
)
|
||||
|
||||
@property
|
||||
def min_cycle_counter(self):
|
||||
"""Get minimum cycle counter."""
|
||||
return self._min_cycle_counter
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue