|
|
@ -42,9 +42,10 @@ class BaseStepTraceParser:
|
|
|
|
output_file_path (str): The output file path.
|
|
|
|
output_file_path (str): The output file path.
|
|
|
|
job_id (int): The job id used to define the start of new step. Default: 0.
|
|
|
|
job_id (int): The job id used to define the start of new step. Default: 0.
|
|
|
|
skip_first_step (bool): Whether skip the first step or not.
|
|
|
|
skip_first_step (bool): Whether skip the first step or not.
|
|
|
|
|
|
|
|
is_training_mode (bool): Whether in training mode or not.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False):
|
|
|
|
def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False, is_training_mode=True):
|
|
|
|
self._input_dir = input_dir
|
|
|
|
self._input_dir = input_dir
|
|
|
|
self._output_path = output_file_path
|
|
|
|
self._output_path = output_file_path
|
|
|
|
self._job_id = job_id
|
|
|
|
self._job_id = job_id
|
|
|
@ -53,6 +54,7 @@ class BaseStepTraceParser:
|
|
|
|
self._header = []
|
|
|
|
self._header = []
|
|
|
|
self._step_num = 0
|
|
|
|
self._step_num = 0
|
|
|
|
self._tag_map = {}
|
|
|
|
self._tag_map = {}
|
|
|
|
|
|
|
|
self._is_training_mode = is_training_mode
|
|
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@property
|
|
|
|
def output_file(self):
|
|
|
|
def output_file(self):
|
|
|
@ -64,7 +66,7 @@ class BaseStepTraceParser:
|
|
|
|
"""The property of step trace info."""
|
|
|
|
"""The property of step trace info."""
|
|
|
|
summary_info = {}
|
|
|
|
summary_info = {}
|
|
|
|
if self._result:
|
|
|
|
if self._result:
|
|
|
|
summary_info = get_summary_for_step_trace(self._result[-1], self._header)
|
|
|
|
summary_info = get_summary_for_step_trace(self._result[-1], self._header, self._is_training_mode)
|
|
|
|
summary_info['total_steps'] = len(self._result) - 1
|
|
|
|
summary_info['total_steps'] = len(self._result) - 1
|
|
|
|
print('\nStep trace summary info (unit: syscnt):')
|
|
|
|
print('\nStep trace summary info (unit: syscnt):')
|
|
|
|
print(summary_info)
|
|
|
|
print(summary_info)
|
|
|
@ -321,15 +323,27 @@ class BaseStepTraceParser:
|
|
|
|
log.info("Finish add average info for step trace.")
|
|
|
|
log.info("Finish add average info for step trace.")
|
|
|
|
|
|
|
|
|
|
|
|
def _save(self):
|
|
|
|
def _save(self):
|
|
|
|
|
|
|
|
"""save step trace file."""
|
|
|
|
|
|
|
|
BP_POINT, TAIL, FP_DURATION = 5, -1, -2
|
|
|
|
log.info("Start to save step trace file.")
|
|
|
|
log.info("Start to save step trace file.")
|
|
|
|
if not self._header:
|
|
|
|
if not self._header:
|
|
|
|
return
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
with open(self._output_path, 'w') as file_handle:
|
|
|
|
with open(self._output_path, 'w') as file_handle:
|
|
|
|
csv_writer = csv.writer(file_handle)
|
|
|
|
csv_writer = csv.writer(file_handle)
|
|
|
|
|
|
|
|
if not self._is_training_mode:
|
|
|
|
|
|
|
|
self._header[FP_DURATION] = 'fp'
|
|
|
|
|
|
|
|
self._header = self._header[:BP_POINT] + self._header[BP_POINT+1:TAIL]
|
|
|
|
csv_writer.writerow(self._header)
|
|
|
|
csv_writer.writerow(self._header)
|
|
|
|
for row_data in self._result:
|
|
|
|
for row_data in self._result:
|
|
|
|
|
|
|
|
if not self._is_training_mode:
|
|
|
|
|
|
|
|
row_data[FP_DURATION] += row_data[TAIL]
|
|
|
|
|
|
|
|
row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL]
|
|
|
|
csv_writer.writerow(row_data)
|
|
|
|
csv_writer.writerow(row_data)
|
|
|
|
os.chmod(self._output_path, stat.S_IRUSR)
|
|
|
|
os.chmod(self._output_path, stat.S_IRUSR)
|
|
|
|
|
|
|
|
except (IOError, OSError) as err:
|
|
|
|
|
|
|
|
log.warning('Failed to save step trace raw info. %s', err)
|
|
|
|
|
|
|
|
raise ProfilerIOException
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GpuStepTraceParser(BaseStepTraceParser):
|
|
|
|
class GpuStepTraceParser(BaseStepTraceParser):
|
|
|
@ -356,10 +370,16 @@ class GpuStepTraceParser(BaseStepTraceParser):
|
|
|
|
log.warning(f'Failed to read {source_file}', err)
|
|
|
|
log.warning(f'Failed to read {source_file}', err)
|
|
|
|
raise ProfilerIOException
|
|
|
|
raise ProfilerIOException
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self._is_training_mode:
|
|
|
|
points = {
|
|
|
|
points = {
|
|
|
|
'fp_start': fp_start_name,
|
|
|
|
'fp_start': fp_start_name,
|
|
|
|
'bp_end': bp_end_name
|
|
|
|
'bp_end': bp_end_name
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
points = {
|
|
|
|
|
|
|
|
'fp_start': fp_start_name,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
with open(output_path, 'w') as json_file:
|
|
|
|
with open(output_path, 'w') as json_file:
|
|
|
|
json.dump(points, json_file)
|
|
|
|
json.dump(points, json_file)
|
|
|
@ -456,10 +476,16 @@ class AscendStepTraceParser(BaseStepTraceParser):
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
dict, parsed point info.
|
|
|
|
dict, parsed point info.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
if self._is_training_mode:
|
|
|
|
points = {
|
|
|
|
points = {
|
|
|
|
'fp_start': point_info.get(self._fp_tag, ''),
|
|
|
|
'fp_start': point_info.get(self._fp_tag, ''),
|
|
|
|
'bp_end': point_info.get(self._bp_tag, '')
|
|
|
|
'bp_end': point_info.get(self._bp_tag, '')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
points = {
|
|
|
|
|
|
|
|
'fp_start': point_info.get(self._fp_tag, ''),
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
with open(output_path, 'w') as json_file:
|
|
|
|
with open(output_path, 'w') as json_file:
|
|
|
|
json.dump(points, json_file)
|
|
|
|
json.dump(points, json_file)
|
|
|
|