|
|
|
@ -16,7 +16,10 @@ import core
|
|
|
|
|
from contextlib import contextmanager
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
__all__ = ['cuda_profiler', 'reset_profiler', 'profiler']
|
|
|
|
|
__all__ = [
|
|
|
|
|
'cuda_profiler', 'reset_profiler', 'profiler', 'start_profiler',
|
|
|
|
|
'stop_profiler'
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
NVPROF_CONFIG = [
|
|
|
|
|
"gpustarttimestamp",
|
|
|
|
@ -72,20 +75,31 @@ def reset_profiler():
|
|
|
|
|
core.reset_profiler()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
|
def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
|
|
|
|
|
"""The profiler interface.
|
|
|
|
|
Different from cuda_profiler, this profiler can be used to profile both CPU
|
|
|
|
|
and GPU program. By defalut, it records the CPU and GPU operator kernels,
|
|
|
|
|
if you want to profile other program, you can refer the profiling tutorial
|
|
|
|
|
to add more records.
|
|
|
|
|
def start_profiler(state):
|
|
|
|
|
"""Enable the profiler.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
state (string) : The profiling state, which should be 'CPU', 'GPU'
|
|
|
|
|
or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
|
|
|
|
|
GPU as well. 'All' also generates timeline.
|
|
|
|
|
"""
|
|
|
|
|
if core.is_profiler_enabled():
|
|
|
|
|
return
|
|
|
|
|
if state not in ['CPU', 'GPU', "All"]:
|
|
|
|
|
raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
|
|
|
|
|
if state == "GPU":
|
|
|
|
|
prof_state = core.ProfilerState.kCUDA
|
|
|
|
|
elif state == "CPU":
|
|
|
|
|
prof_state = core.ProfilerState.kCPU
|
|
|
|
|
else:
|
|
|
|
|
prof_state = core.ProfilerState.kAll
|
|
|
|
|
core.enable_profiler(prof_state)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
|
|
|
|
|
"""Stop the profiler.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
state (string) : The profiling state, which should be 'CPU' or 'GPU',
|
|
|
|
|
telling the profiler to use CPU timer or GPU timer for profiling.
|
|
|
|
|
Although users may have already specified the execution place
|
|
|
|
|
(CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
|
|
|
|
|
would not inherit this place.
|
|
|
|
|
sorted_key (string) : If None, the profiling results will be printed
|
|
|
|
|
in the order of first end time of events. Otherwise, the profiling
|
|
|
|
|
results will be sorted by the this flag. This flag should be one
|
|
|
|
@ -98,17 +112,8 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
|
|
|
|
|
profile_path (string) : If state == 'All', it will write a profile
|
|
|
|
|
proto output file.
|
|
|
|
|
"""
|
|
|
|
|
if state not in ['CPU', 'GPU', "All"]:
|
|
|
|
|
raise ValueError("The state must be 'CPU' or 'GPU' or 'All'.")
|
|
|
|
|
if state == "GPU":
|
|
|
|
|
prof_state = core.ProfilerState.kCUDA
|
|
|
|
|
elif state == "CPU":
|
|
|
|
|
prof_state = core.ProfilerState.kCPU
|
|
|
|
|
else:
|
|
|
|
|
prof_state = core.ProfilerState.kAll
|
|
|
|
|
core.enable_profiler(prof_state)
|
|
|
|
|
yield
|
|
|
|
|
|
|
|
|
|
if not core.is_profiler_enabled():
|
|
|
|
|
return
|
|
|
|
|
sorted_key = 'default' if sorted_key is None else sorted_key
|
|
|
|
|
if sorted_key not in ['default', 'calls', 'total', 'max', 'min', 'ave']:
|
|
|
|
|
raise ValueError("The sorted_key must be None or in 'calls', 'total', "
|
|
|
|
@ -124,3 +129,34 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
|
|
|
|
|
# TODO(qingqing) : redirect C++ ostream to Python stream.
|
|
|
|
|
# with core.ostream_redirect(stdout=True, stderr=True):
|
|
|
|
|
core.disable_profiler(key_map[sorted_key], profile_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
|
def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
|
|
|
|
|
"""The profiler interface.
|
|
|
|
|
Different from cuda_profiler, this profiler can be used to profile both CPU
|
|
|
|
|
and GPU program. By defalut, it records the CPU and GPU operator kernels,
|
|
|
|
|
if you want to profile other program, you can refer the profiling tutorial
|
|
|
|
|
to add more records.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
state (string) : The profiling state, which should be 'CPU' or 'GPU',
|
|
|
|
|
telling the profiler to use CPU timer or GPU timer for profiling.
|
|
|
|
|
Although users may have already specified the execution place
|
|
|
|
|
(CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
|
|
|
|
|
would not inherit this place.
|
|
|
|
|
sorted_key (string) : If None, the profiling results will be printed
|
|
|
|
|
in the order of first end time of events. Otherwise, the profiling
|
|
|
|
|
results will be sorted by the this flag. This flag should be one
|
|
|
|
|
of 'calls', 'total', 'max', 'min' or 'ave'.
|
|
|
|
|
The `calls` means sorting by the number of calls.
|
|
|
|
|
The `total` means sorting by the total execution time.
|
|
|
|
|
The `max` means sorting by the maximum execution time.
|
|
|
|
|
The `min` means sorting by the minimum execution time.
|
|
|
|
|
The `ave` means sorting by the average execution time.
|
|
|
|
|
profile_path (string) : If state == 'All', it will write a profile
|
|
|
|
|
proto output file.
|
|
|
|
|
"""
|
|
|
|
|
start_profiler(state)
|
|
|
|
|
yield
|
|
|
|
|
stop_profiler(sorted_key, profile_path)
|
|
|
|
|