|
|
|
@ -15,7 +15,7 @@ limitations under the License. */
|
|
|
|
|
#pragma once
|
|
|
|
|
#include <forward_list>
|
|
|
|
|
#include <list>
|
|
|
|
|
#include <mutex>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include "paddle/fluid/platform/device_context.h"
|
|
|
|
|
#include "paddle/fluid/platform/profiler.pb.h"
|
|
|
|
@ -23,16 +23,16 @@ limitations under the License. */
|
|
|
|
|
namespace paddle {
|
|
|
|
|
namespace platform {
|
|
|
|
|
|
|
|
|
|
enum EventKind { kMark, kPushRange, kPopRange };
|
|
|
|
|
enum EventType { kMark, kPushRange, kPopRange };
|
|
|
|
|
|
|
|
|
|
class Event {
|
|
|
|
|
public:
|
|
|
|
|
// The DeviceContext is used to get the cuda stream.
|
|
|
|
|
// If CPU profiling mode, can pass nullptr.
|
|
|
|
|
Event(EventKind kind, std::string name, uint32_t thread_id,
|
|
|
|
|
Event(EventType type, std::string name, uint32_t thread_id,
|
|
|
|
|
const DeviceContext* dev_ctx);
|
|
|
|
|
|
|
|
|
|
std::string kind() const;
|
|
|
|
|
const EventType& type() const;
|
|
|
|
|
std::string name() const { return name_; }
|
|
|
|
|
uint32_t thread_id() const { return thread_id_; }
|
|
|
|
|
bool has_cuda() const { return has_cuda_; }
|
|
|
|
@ -46,7 +46,7 @@ class Event {
|
|
|
|
|
double CudaElapsedMs(const Event& e) const;
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
EventKind kind_;
|
|
|
|
|
EventType type_;
|
|
|
|
|
std::string name_;
|
|
|
|
|
uint32_t thread_id_;
|
|
|
|
|
int64_t cpu_ns_;
|
|
|
|
@ -57,39 +57,6 @@ class Event {
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct EventList {
|
|
|
|
|
constexpr static size_t kMB = 1024 * 1024;
|
|
|
|
|
constexpr static size_t kEventBlockSize = 16 * kMB;
|
|
|
|
|
constexpr static size_t kEventSize = sizeof(Event);
|
|
|
|
|
constexpr static size_t kEventAlign = alignof(Event);
|
|
|
|
|
constexpr static size_t kNumBlock =
|
|
|
|
|
kEventBlockSize /
|
|
|
|
|
((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign);
|
|
|
|
|
|
|
|
|
|
template <typename... Args>
|
|
|
|
|
void Record(Args&&... args) {
|
|
|
|
|
if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) {
|
|
|
|
|
event_blocks.emplace_front();
|
|
|
|
|
event_blocks.front().reserve(kNumBlock);
|
|
|
|
|
}
|
|
|
|
|
event_blocks.front().emplace_back(std::forward<Args>(args)...);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<Event> Reduce() {
|
|
|
|
|
std::vector<Event> result;
|
|
|
|
|
for (auto& block : event_blocks) {
|
|
|
|
|
result.insert(result.begin(), std::make_move_iterator(block.begin()),
|
|
|
|
|
std::make_move_iterator(block.end()));
|
|
|
|
|
}
|
|
|
|
|
event_blocks.clear();
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Clear() { event_blocks.clear(); }
|
|
|
|
|
|
|
|
|
|
std::forward_list<std::vector<Event>> event_blocks;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum ProfilerState {
|
|
|
|
|
kDisabled, // disabled state
|
|
|
|
|
kCPU, // CPU profiling state
|
|
|
|
@ -136,16 +103,6 @@ struct RecordThread {
|
|
|
|
|
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
|
|
|
|
|
std::vector<std::vector<Event>> GetAllEvents();
|
|
|
|
|
|
|
|
|
|
// The information of each event given in the profiling report
|
|
|
|
|
struct EventItem {
|
|
|
|
|
std::string name;
|
|
|
|
|
int calls;
|
|
|
|
|
double total_time;
|
|
|
|
|
double min_time;
|
|
|
|
|
double max_time;
|
|
|
|
|
double ave_time;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Candidate keys to sort the profiling report
|
|
|
|
|
enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
|
|
|
|
|
|
|
|
|
@ -158,14 +115,5 @@ void ResetProfiler();
|
|
|
|
|
void DisableProfiler(EventSortingKey sorted_key,
|
|
|
|
|
const std::string& profile_path);
|
|
|
|
|
|
|
|
|
|
// Parse the event list and output the profiling report
|
|
|
|
|
void ParseEvents(std::vector<std::vector<Event>>&,
|
|
|
|
|
EventSortingKey sorted_by = EventSortingKey::kDefault);
|
|
|
|
|
|
|
|
|
|
// Print results
|
|
|
|
|
void PrintProfiler(std::vector<std::vector<EventItem>>& events_table,
|
|
|
|
|
std::string& sorted_domain, const size_t name_width,
|
|
|
|
|
const size_t data_width);
|
|
|
|
|
|
|
|
|
|
} // namespace platform
|
|
|
|
|
} // namespace paddle
|
|
|
|
|