@ -276,7 +276,7 @@ struct EventItem {
// Print results
void PrintProfiler ( const std : : vector < std : : vector < EventItem > > & events_table ,
const std : : string & sorted_domain , const size_t name_width ,
const size_t data_width , double total ) {
const size_t data_width , bool merge_thread ) {
// Output header information
std : : cout < < " \n -------------------------> "
< < " Profiling Report "
@ -292,6 +292,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
PADDLE_THROW ( " Invalid profiler state " , g_state ) ;
}
if ( merge_thread ) {
std : : cout < < " Note! This Report merge all thread info into one. "
< < std : : endl ;
}
std : : cout < < " Place: " < < place < < std : : endl ;
std : : cout < < " Time unit: ms " < < std : : endl ;
std : : cout < < " Sorted by " < < sorted_domain
@ -312,8 +316,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
< < std : : setw ( data_width ) < < event_item . min_time
< < std : : setw ( data_width ) < < event_item . max_time
< < std : : setw ( data_width ) < < event_item . ave_time
< < std : : setw ( data_width ) < < event_item . total_time / total
< < std : : endl ;
< < std : : setw ( data_width ) < < event_item . ratio < < std : : endl ;
}
}
std : : cout < < std : : endl ;
@ -321,8 +324,10 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
// Parse the event list and output the profiling report
void ParseEvents ( const std : : vector < std : : vector < Event > > & events ,
bool merge_thread ,
EventSortingKey sorted_by = EventSortingKey : : kDefault ) {
if ( g_state = = ProfilerState : : kDisabled ) return ;
if ( merge_thread & & events . size ( ) < 2 ) return ;
std : : string sorted_domain ;
std : : function < bool ( const EventItem & , const EventItem & ) > sorted_func ;
@ -361,34 +366,55 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
sorted_domain = " event first end time " ;
}
const std : : vector < std : : vector < Event > > * analyze_events ;
std : : vector < std : : vector < Event > > merged_events_list ;
if ( merge_thread ) {
std : : vector < Event > merged_events ;
for ( int i = 0 ; i < events . size ( ) ; + + i ) {
for ( int j = 0 ; j < events [ i ] . size ( ) ; + + j ) {
merged_events . push_back ( events [ i ] [ j ] ) ;
}
}
merged_events_list . push_back ( merged_events ) ;
analyze_events = & merged_events_list ;
} else {
analyze_events = & events ;
}
std : : vector < std : : vector < EventItem > > events_table ;
size_t max_name_width = 0 ;
double total = 0. ; // the total time
for ( size_t i = 0 ; i < events . size ( ) ; i + + ) {
for ( size_t i = 0 ; i < ( * analyze_events ) . size ( ) ; i + + ) {
double total = 0. ; // the total time in one thread
std : : list < Event > pushed_events ;
std : : vector < EventItem > event_items ;
std : : unordered_map < std : : string , int > event_idx ;
for ( size_t j = 0 ; j < events [ i ] . size ( ) ; j + + ) {
if ( events [ i ] [ j ] . type ( ) = = EventType : : kPushRange ) {
pushed_events . push_back ( events [ i ] [ j ] ) ;
} else if ( events [ i ] [ j ] . type ( ) = = EventType : : kPopRange ) {
for ( size_t j = 0 ; j < ( * analyze_ events) [ i ] . size ( ) ; j + + ) {
if ( ( * analyze_ events) [ i ] [ j ] . type ( ) = = EventType : : kPushRange ) {
pushed_events . push_back ( ( * analyze_ events) [ i ] [ j ] ) ;
} else if ( ( * analyze_ events) [ i ] [ j ] . type ( ) = = EventType : : kPopRange ) {
std : : list < Event > : : reverse_iterator rit = pushed_events . rbegin ( ) ;
while ( rit ! = pushed_events . rend ( ) & &
rit - > name ( ) ! = events [ i ] [ j ] . name ( ) ) {
rit - > name ( ) ! = ( * analyze_ events) [ i ] [ j ] . name ( ) ) {
+ + rit ;
}
if ( rit ! = pushed_events . rend ( ) ) {
double event_time = ( g_state = = ProfilerState : : kCUDA | |
g_state = = ProfilerState : : kAll )
? rit - > CudaElapsedMs ( events[ i ] [ j ] )
: rit - > CpuElapsedMs ( events[ i ] [ j ] ) ;
? rit - > CudaElapsedMs ( ( * analyze_ events) [ i ] [ j ] )
: rit - > CpuElapsedMs ( ( * analyze_ events) [ i ] [ j ] ) ;
total + = event_time ;
std : : string event_name =
" thread " + std : : to_string ( rit - > thread_id ( ) ) + " :: " + rit - > name ( ) ;
std : : string event_name ;
if ( merge_thread ) {
event_name = rit - > name ( ) ;
max_name_width = std : : max ( max_name_width , event_name . size ( ) ) ;
} else {
event_name = " thread " + std : : to_string ( rit - > thread_id ( ) ) + " :: " +
rit - > name ( ) ;
max_name_width = std : : max ( max_name_width , event_name . size ( ) ) ;
}
if ( event_idx . find ( event_name ) = = event_idx . end ( ) ) {
event_idx [ event_name ] = event_items . size ( ) ;
@ -413,7 +439,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
pushed_events . erase ( ( + + rit ) . base ( ) ) ;
} else {
LOG ( WARNING ) < < " Cannot find the push marker of event \' "
< < events[ i ] [ j ] . name ( )
< < ( * analyze_ events) [ i ] [ j ] . name ( )
< < " \' , which will be ignored in profiling report. " ;
}
}
@ -421,6 +447,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
// average time
for ( auto & item : event_items ) {
item . ave_time = item . total_time / item . calls ;
item . ratio = item . total_time / total ;
}
// sort
if ( sorted_by ! = EventSortingKey : : kDefault ) {
@ -438,7 +465,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
// Print report
PrintProfiler ( events_table , sorted_domain , max_name_width + 4 , 12 , total ) ;
PrintProfiler ( events_table , sorted_domain , max_name_width + 4 , 12 ,
merge_thread ) ;
}
void DisableProfiler ( EventSortingKey sorted_key ,
@ -449,7 +477,8 @@ void DisableProfiler(EventSortingKey sorted_key,
Mark ( " _stop_profiler_ " , nullptr ) ;
std : : vector < std : : vector < Event > > all_events = GetAllEvents ( ) ;
ParseEvents ( all_events , sorted_key ) ;
ParseEvents ( all_events , true , sorted_key ) ;
ParseEvents ( all_events , false , sorted_key ) ;
ResetProfiler ( ) ;
DeviceTracer * tracer = GetDeviceTracer ( ) ;
if ( tracer - > IsEnabled ( ) ) {