!10454 Fix the unsafe code of Profiler

From: @gzhcv
Reviewed-by: @wangyue01,@lilongfei15
Signed-off-by: @lilongfei15
pull/10454/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit e30734d1b8

@ -22,7 +22,7 @@ from collections import namedtuple
from decimal import Decimal
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException, \
JobIdMismatchException, ProfilerIOException
JobIdMismatchException, ProfilerIOException, ProfilerRawFileException
from mindspore import log
from mindspore.profiler.common.util import get_summary_for_step_trace
from mindspore.profiler.common.validator.validate_path import \
@ -401,13 +401,25 @@ class GpuStepTraceParser(BaseStepTraceParser):
fp_start, bp_end, iter_end, iter_start = 0, 1, 2, 3
reduce_start = 4
start_time, end_time = 0, 1
STEP_TRACE_POINT_COUNT = 3
source_file = validate_and_normalize_path(source_file)
try:
with open(source_file, 'r') as f:
lines = f.readlines()
if len(lines) < STEP_TRACE_POINT_COUNT:
raise ProfilerRawFileException(
f"Failed to parse {source_file} file. The FP_POINT/BP_POINT/ITER_END_POINT "
f"do not recognized correctly. Try to set the environment variable'PROFILING_FP_START' "
f"and 'PROFILING_BP_END' to solve this problem. For example, "
f"'export PROFILING_FP_START=Defualt/xxx/Conv2d-op1' ")
step_trace_info_all = [line.strip().split()[1:] for line in lines]
num_of_step = len(step_trace_info_all[0])
for step_trace_point in step_trace_info_all:
if len(step_trace_point) != num_of_step:
raise ProfilerRawFileException(
f"Failed to parse {source_file} file. Due to the profiled "
f"step_num of FP/BP/ITER_END Point are not equal")
iter_start_info = [step_trace_info_all[fp_start][0]] + \
step_trace_info_all[iter_end][:num_of_step]
step_trace_info_all.insert(iter_start, iter_start_info)

@ -130,7 +130,7 @@ class Profiler:
profiling_options = json.dumps(profiling_options)
# Characters longer than 2048 are ignored, resulting in profiling option resolution errors
if len(profiling_options) > 2048:
raise ValueError("The parameter length exceeds the limit (2048)")
raise ValueError("The parameter length exceeds the limit (2048), please input valid parameters.")
# use context interface to open profiling, for the new mindspore version(after 2020.5.21)
context.set_context(enable_profiling=True, profiling_options=profiling_options)
@ -369,36 +369,30 @@ class Profiler:
return self._profiling_job_id
job_id = ""
cmd = "ls -t " + self._output_path + "|grep JOB|awk '{print $1}'"
r = os.popen(cmd)
profiling_job_dirs = r.readlines()
r.close()
for item in profiling_job_dirs:
path = os.path.join(self._output_path, item.strip())
log_file = get_file_names(path, "host_start.log")
if not log_file:
logger.error("Profiling: job path %s, host_start.log not exist.", path)
continue
log_file = os.path.join(path, log_file[0])
item_dict = self._parse_host_start_log(log_file)
if not item_dict:
logger.error("Profiling: job path %s, fail to get job start info.", path)
continue
if self._dev_id != item_dict["device_id"]:
logger.info("Profiling: job path %s, dev id %s, training device id %s.",
path, item_dict["device_id"], self._dev_id)
continue
if self._start_time > int(item_dict["start_time"]):
logger.info("Profiling: job path %s, start_time %s, training start_time %d.",
path, item_dict["start_time"], self._start_time)
break
for item in os.listdir(self._output_path):
if item.startswith('JOB'):
path = os.path.join(self._output_path, item)
job_id = item
log_file = get_file_names(path, "host_start.log")
if not log_file:
logger.error("Profiling: job path %s, host_start.log not exist.", path)
log_file = os.path.join(path, log_file[0])
item_dict = self._parse_host_start_log(log_file)
if not item_dict:
logger.error("Profiling: job path %s, fail to get job start info.", path)
job_id = item.strip()
break
if self._dev_id != item_dict["device_id"]:
logger.info("Profiling: job path %s, dev id %s, training device id %s.",
path, item_dict["device_id"], self._dev_id)
if self._start_time > int(item_dict["start_time"]):
logger.info("Profiling: job path %s, start_time %s, training start_time %d.",
path, item_dict["start_time"], self._start_time)
break
if not job_id:
msg = "Fail to get profiling job, please check whether job dir was generated"

Loading…
Cancel
Save