From de3a653c57611c340349ec8fc53a8c5b3a9752eb Mon Sep 17 00:00:00 2001
From: gzhcv <guozhaohong@huawei.com>
Date: Mon, 7 Dec 2020 10:27:59 +0800
Subject: [PATCH] fix the bug of step_trace incorrect content in inference
 scene

---
 .../ccsrc/profiler/device/gpu/data_saver.cc   |  2 +-
 .../device/gpu/gpu_profiling_utils.cc         |  1 -
 .../runtime/device/gpu/gpu_kernel_runtime.cc  |  2 +-
 mindspore/profiler/common/util.py             | 26 ++++++---
 mindspore/profiler/parser/integrator.py       | 20 ++++++-
 .../profiler/parser/step_trace_parser.py      | 58 ++++++++++++++-----
 mindspore/profiler/profiling.py               | 16 +++--
 7 files changed, 92 insertions(+), 33 deletions(-)

diff --git a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc
index fb74820538..5703c9e0e8 100644
--- a/mindspore/ccsrc/profiler/device/gpu/data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/data_saver.cc
@@ -273,7 +273,7 @@ void DataSaver::WriteStepTrace(const std::string &saver_base_dir) {
   }
 
   // write step trace time info into file
-  uint32_t factor = 10;
+  const uint32_t factor = 10;
   std::vector<std::string> op_name_arr;
   op_name_arr.push_back(step_trace_op_name.trace_fp_start);
   op_name_arr.push_back(step_trace_op_name.trace_bp_end);
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc
index b8a7a59a5a..b0bfb40a0c 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling_utils.cc
@@ -170,7 +170,6 @@ std::string ProfilingUtils::GetGraphSecondLastKernelName(const std::vector<CNode
 
   return second_last_kernel_name;
 }
-
 }  // namespace gpu
 }  // namespace profiler
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index 1e741e0196..95dc4ea6b9 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -592,7 +592,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
   auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
   MS_EXCEPTION_IF_NULL(profiler_inst);
 
-  if (is_first_step_map_[graph->graph_id()]) {
+  if (profiler_inst->GetEnableFlag() && is_first_step_map_[graph->graph_id()]) {
     profiler::gpu::ProfilingTraceInfo profiling_trace =
       profiler::gpu::ProfilingUtils::GetProfilingTraceFromEnv(NOT_NULL(graph));
     profiler_inst->SetStepTraceOpName(profiling_trace);
diff --git a/mindspore/profiler/common/util.py b/mindspore/profiler/common/util.py
index 180d163ff2..3860597069 100644
--- a/mindspore/profiler/common/util.py
+++ b/mindspore/profiler/common/util.py
@@ -233,24 +233,34 @@ def query_step_trace_file(profiler_dir):
     return None
 
 
-def get_summary_for_step_trace(average_info, header):
+def get_summary_for_step_trace(average_info, header, is_training_mode=True):
     """The property of summary info."""
     if not average_info or not header:
         return {}
     total_time = get_field_value(average_info, 'total', header)
     iteration_interval = get_field_value(average_info, 'iteration_interval',
                                          header)
-    fp_and_bp = get_field_value(average_info, 'fp_and_bp', header)
-    tail = get_field_value(average_info, 'tail', header)
-    summary = {
+    summary_part = {
         'total_time': total_time,
         'iteration_interval': iteration_interval,
         'iteration_interval_percent': calculate_percent(iteration_interval, total_time),
-        'fp_and_bp': fp_and_bp,
-        'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
-        'tail': tail,
-        'tail_percent': calculate_percent(tail, total_time)
     }
+    if is_training_mode:
+        fp_and_bp = get_field_value(average_info, 'fp_and_bp', header)
+        tail = get_field_value(average_info, 'tail', header)
+        summary = {
+            'fp_and_bp': fp_and_bp,
+            'fp_and_bp_percent': calculate_percent(fp_and_bp, total_time),
+            'tail': tail,
+            'tail_percent': calculate_percent(tail, total_time)
+        }
+    else:
+        fp = get_field_value(average_info, 'fp', header)
+        summary = {
+            'fp': fp,
+            'fp_percent': calculate_percent(fp, total_time)
+        }
+    summary.update(summary_part)
     return summary
 
 
diff --git a/mindspore/profiler/parser/integrator.py b/mindspore/profiler/parser/integrator.py
index ce96f77b51..92767d2a6b 100644
--- a/mindspore/profiler/parser/integrator.py
+++ b/mindspore/profiler/parser/integrator.py
@@ -21,7 +21,7 @@ from decimal import Decimal
 
 from mindspore import log as logger
 from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
-    ProfilerFileNotFoundException, ProfilerRawFileException
+    ProfilerFileNotFoundException, ProfilerRawFileException, ProfilerParamValueErrorException
 from mindspore.profiler.common.util import query_latest_trace_time_file, to_int, to_millisecond
 from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
 from mindspore.profiler.parser.container import TimelineContainer
@@ -776,6 +776,24 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
         # Update timeline summary info
         self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
 
+    def check_op_name(self, op_name):
+        """
+        Check whether the operator name exists.
+
+        Args:
+            op_name (str): The operator name or operator name prefix.
+
+        Returns:
+            bool, `True` if the operator name does exist, else `False`.
+        """
+        if not op_name:
+            raise ProfilerParamValueErrorException('The op_name should exist.')
+        for op_time_info in self._timeline_meta:
+            full_op_name = op_time_info['name']
+            if full_op_name and full_op_name.startswith(op_name):
+                return True
+        return False
+
 class AscendTimelineGenerator(BaseTimelineGenerator):
     """Generate ascend Timeline data from file."""
     _display_filename = 'ascend_timeline_display_{}.json'
diff --git a/mindspore/profiler/parser/step_trace_parser.py b/mindspore/profiler/parser/step_trace_parser.py
index 22875fb6ad..a61537bea6 100644
--- a/mindspore/profiler/parser/step_trace_parser.py
+++ b/mindspore/profiler/parser/step_trace_parser.py
@@ -42,9 +42,10 @@ class BaseStepTraceParser:
         output_file_path (str): The output file path.
         job_id (int): The job id used to define the start of new step. Default: 0.
         skip_first_step (bool): Whether skip the first step or not.
+        is_training_mode (bool): Whether in training mode or not.
     """
 
-    def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False):
+    def __init__(self, input_dir, output_file_path, job_id=0, skip_first_step=False, is_training_mode=True):
         self._input_dir = input_dir
         self._output_path = output_file_path
         self._job_id = job_id
@@ -53,6 +54,7 @@ class BaseStepTraceParser:
         self._header = []
         self._step_num = 0
         self._tag_map = {}
+        self._is_training_mode = is_training_mode
 
     @property
     def output_file(self):
@@ -64,7 +66,7 @@ class BaseStepTraceParser:
         """The property of step trace info."""
         summary_info = {}
         if self._result:
-            summary_info = get_summary_for_step_trace(self._result[-1], self._header)
+            summary_info = get_summary_for_step_trace(self._result[-1], self._header, self._is_training_mode)
             summary_info['total_steps'] = len(self._result) - 1
         print('\nStep trace summary info (unit: syscnt):')
         print(summary_info)
@@ -321,15 +323,27 @@ class BaseStepTraceParser:
         log.info("Finish add average info for step trace.")
 
     def _save(self):
+        """save step trace file."""
+        BP_POINT, TAIL, FP_DURATION = 5, -1, -2
         log.info("Start to save step trace file.")
         if not self._header:
             return
-        with open(self._output_path, 'w') as file_handle:
-            csv_writer = csv.writer(file_handle)
-            csv_writer.writerow(self._header)
-            for row_data in self._result:
-                csv_writer.writerow(row_data)
-        os.chmod(self._output_path, stat.S_IRUSR)
+        try:
+            with open(self._output_path, 'w') as file_handle:
+                csv_writer = csv.writer(file_handle)
+                if not self._is_training_mode:
+                    self._header[FP_DURATION] = 'fp'
+                    self._header = self._header[:BP_POINT] + self._header[BP_POINT+1:TAIL]
+                csv_writer.writerow(self._header)
+                for row_data in self._result:
+                    if not self._is_training_mode:
+                        row_data[FP_DURATION] += row_data[TAIL]
+                        row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL]
+                    csv_writer.writerow(row_data)
+            os.chmod(self._output_path, stat.S_IRUSR)
+        except (IOError, OSError) as err:
+            log.warning('Failed to save step trace raw info. %s', err)
+            raise ProfilerIOException
 
 
 class GpuStepTraceParser(BaseStepTraceParser):
@@ -356,10 +370,16 @@ class GpuStepTraceParser(BaseStepTraceParser):
             log.warning(f'Failed to read {source_file}', err)
             raise ProfilerIOException
 
-        points = {
-            'fp_start': fp_start_name,
-            'bp_end': bp_end_name
-        }
+        if self._is_training_mode:
+            points = {
+                'fp_start': fp_start_name,
+                'bp_end': bp_end_name
+            }
+        else:
+            points = {
+                'fp_start': fp_start_name,
+            }
+
         try:
             with open(output_path, 'w') as json_file:
                 json.dump(points, json_file)
@@ -456,10 +476,16 @@ class AscendStepTraceParser(BaseStepTraceParser):
         Returns:
             dict, parsed point info.
         """
-        points = {
-            'fp_start': point_info.get(self._fp_tag, ''),
-            'bp_end': point_info.get(self._bp_tag, '')
-        }
+        if self._is_training_mode:
+            points = {
+                'fp_start': point_info.get(self._fp_tag, ''),
+                'bp_end': point_info.get(self._bp_tag, '')
+            }
+        else:
+            points = {
+                'fp_start': point_info.get(self._fp_tag, ''),
+            }
+
         try:
             with open(output_path, 'w') as json_file:
                 json.dump(points, json_file)
diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py
index 2770bafcc7..e206fee748 100644
--- a/mindspore/profiler/profiling.py
+++ b/mindspore/profiler/profiling.py
@@ -151,7 +151,7 @@ class Profiler:
                 logger.error('Please check the Profiler object initialized after set_auto_parallel_context() '
                              'and init(). Profiler should be initialized after these code. ')
             self._gpu_profiler.stop()
-            self._generate_timeline()
+            timeline_generator = self._generate_timeline()
 
             # parse minddata pipeline operator and queue for GPU
             try:
@@ -162,7 +162,7 @@ class Profiler:
 
             # analyse step trace info
             try:
-                self._analyse_step_trace()
+                self._analyse_step_trace(is_training_mode_flag=timeline_generator.check_op_name('Gradients'))
             except ProfilerException as err:
                 logger.warning(err.message)
 
@@ -239,13 +239,14 @@ class Profiler:
             os.environ['PROFILING_MODE'] = str("false")
             context.set_context(enable_profiling=False)
 
-    def _analyse_step_trace(self, source_path=None, framework_parser=None):
+    def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True):
         """
         Analyse step trace data and save the result.
 
         Args:
             source_path (str): The directory that contains the step trace original data.
             framework_parser (FrameworkParser): The framework parse instance.
+            is_training_mode_flag (bool): Whether in training mode or not.
         """
         logger.info("Begin to parse step trace.")
         # construct output path
@@ -266,19 +267,23 @@ class Profiler:
                 f'step_trace_profiling_{self._dev_id}.txt'
             )
             parser = GpuStepTraceParser(input_dir=input_file_path,
-                                        output_file_path=step_trace_intermediate_file_path)
+                                        output_file_path=step_trace_intermediate_file_path,
+                                        is_training_mode=is_training_mode_flag)
             parser.parse_and_save()
             point_info = parser.record_point_info(input_file_path, point_info_file_path)
         else:
             # whether keep the first step
             skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
             point_info = framework_parser.point_info
+            # recognize inference or traning mode
+            is_traning_mode_flag = framework_parser.check_op_name("Gradients")
             # parser the step trace files and save the result to disk
             source_path = validate_and_normalize_path(source_path)
             parser = AscendStepTraceParser(input_dir=source_path,
                                            output_file_path=step_trace_intermediate_file_path,
                                            job_id=self._job_id_env,
-                                           skip_first_step=skip_first_step_flag)
+                                           skip_first_step=skip_first_step_flag,
+                                           is_training_mode=is_traning_mode_flag)
             parser.update_tag_op_type_map(point_info)
             parser.parse_and_save()
             point_info = parser.record_point_info(point_info, point_info_file_path)
@@ -332,6 +337,7 @@ class Profiler:
             timeline_generator.init_timeline()
             timeline_generator.write_timeline(size_limit)
             timeline_generator.write_timeline_summary()
+            return timeline_generator
         except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
             logger.warning('Fail to write timeline data: %s', err)
             raise RuntimeError('Fail to write timeline data.')