Code cleanup in the profiler code. (#9782)

fea/docker_cudnn7
qingqing01 7 years ago committed by Yi Wang
parent 326b434ab5
commit add367c3f4

File diff suppressed because it is too large Load Diff

@ -15,7 +15,7 @@ limitations under the License. */
#pragma once
#include <forward_list>
#include <list>
#include <mutex>
#include <string>
#include <vector>
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/profiler.pb.h"
@ -23,16 +23,16 @@ limitations under the License. */
namespace paddle {
namespace platform {
enum EventKind { kMark, kPushRange, kPopRange };
enum EventType { kMark, kPushRange, kPopRange };
class Event {
public:
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
Event(EventKind kind, std::string name, uint32_t thread_id,
Event(EventType type, std::string name, uint32_t thread_id,
const DeviceContext* dev_ctx);
std::string kind() const;
const EventType& type() const;
std::string name() const { return name_; }
uint32_t thread_id() const { return thread_id_; }
bool has_cuda() const { return has_cuda_; }
@ -46,7 +46,7 @@ class Event {
double CudaElapsedMs(const Event& e) const;
private:
EventKind kind_;
EventType type_;
std::string name_;
uint32_t thread_id_;
int64_t cpu_ns_;
@ -57,39 +57,6 @@ class Event {
#endif
};
struct EventList {
constexpr static size_t kMB = 1024 * 1024;
constexpr static size_t kEventBlockSize = 16 * kMB;
constexpr static size_t kEventSize = sizeof(Event);
constexpr static size_t kEventAlign = alignof(Event);
constexpr static size_t kNumBlock =
kEventBlockSize /
((kEventSize + kEventAlign - 1) / kEventAlign * kEventAlign);
template <typename... Args>
void Record(Args&&... args) {
if (event_blocks.empty() || event_blocks.front().size() == kNumBlock) {
event_blocks.emplace_front();
event_blocks.front().reserve(kNumBlock);
}
event_blocks.front().emplace_back(std::forward<Args>(args)...);
}
std::vector<Event> Reduce() {
std::vector<Event> result;
for (auto& block : event_blocks) {
result.insert(result.begin(), std::make_move_iterator(block.begin()),
std::make_move_iterator(block.end()));
}
event_blocks.clear();
return result;
}
void Clear() { event_blocks.clear(); }
std::forward_list<std::vector<Event>> event_blocks;
};
enum ProfilerState {
kDisabled, // disabled state
kCPU, // CPU profiling state
@ -136,16 +103,6 @@ struct RecordThread {
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
// The information of each event given in the profiling report
struct EventItem {
std::string name;
int calls;
double total_time;
double min_time;
double max_time;
double ave_time;
};
// Candidate keys to sort the profiling report
enum EventSortingKey { kDefault, kCalls, kTotal, kMin, kMax, kAve };
@ -158,14 +115,5 @@ void ResetProfiler();
void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path);
// Parse the event list and output the profiling report
void ParseEvents(std::vector<std::vector<Event>>&,
EventSortingKey sorted_by = EventSortingKey::kDefault);
// Print results
void PrintProfiler(std::vector<std::vector<EventItem>>& events_table,
std::string& sorted_domain, const size_t name_width,
const size_t data_width);
} // namespace platform
} // namespace paddle

@ -13,22 +13,23 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/profiler.h"
#include <string>
#ifdef PADDLE_WITH_CUDA
#include "cuda_runtime.h"
#include <cuda_runtime.h>
#endif
#include "gtest/gtest.h"
TEST(Event, CpuElapsedTime) {
using paddle::platform::Event;
using paddle::platform::EventKind;
using paddle::platform::EventType;
Event start_event(EventKind::kPushRange, "test", 0, nullptr);
Event start_event(EventType::kPushRange, "test", 0, nullptr);
EXPECT_TRUE(start_event.has_cuda() == false);
int counter = 0;
while (counter != 1000) {
counter++;
}
Event stop_event(EventKind::kPopRange, "test", 0, nullptr);
Event stop_event(EventType::kPopRange, "test", 0, nullptr);
EXPECT_GT(start_event.CpuElapsedMs(stop_event), 0);
}
@ -38,16 +39,16 @@ TEST(Event, CudaElapsedTime) {
using paddle::platform::CUDADeviceContext;
using paddle::platform::CUDAPlace;
using paddle::platform::Event;
using paddle::platform::EventKind;
using paddle::platform::EventType;
DeviceContext* dev_ctx = new CUDADeviceContext(CUDAPlace(0));
Event start_event(EventKind::kPushRange, "test", 0, dev_ctx);
Event start_event(EventType::kPushRange, "test", 0, dev_ctx);
EXPECT_TRUE(start_event.has_cuda() == true);
int counter = 0;
while (counter != 1000) {
counter++;
}
Event stop_event(EventKind::kPopRange, "test", 0, dev_ctx);
Event stop_event(EventType::kPopRange, "test", 0, dev_ctx);
EXPECT_GT(start_event.CudaElapsedMs(stop_event), 0);
}
#endif
@ -55,7 +56,7 @@ TEST(Event, CudaElapsedTime) {
TEST(RecordEvent, RecordEvent) {
using paddle::platform::DeviceContext;
using paddle::platform::Event;
using paddle::platform::EventKind;
using paddle::platform::EventType;
using paddle::platform::RecordEvent;
using paddle::platform::ProfilerState;
using paddle::platform::EventSortingKey;

Loading…
Cancel
Save