From 8e904d322f7742bbc1716455706d7e0847c3c256 Mon Sep 17 00:00:00 2001 From: chengduo Date: Mon, 25 Feb 2019 02:13:40 -0600 Subject: [PATCH] Remove unnecessary dependence for profiler (#15899) * refile profiler test=develop * follow comment test=develop --- paddle/fluid/platform/CMakeLists.txt | 6 +-- paddle/fluid/platform/device_tracer.h | 3 +- paddle/fluid/platform/event.h | 65 ++++++++++++++++++++++++++ paddle/fluid/platform/profiler.cu | 20 ++++---- paddle/fluid/platform/profiler.h | 51 ++------------------ paddle/fluid/platform/profiler_test.cc | 1 - 6 files changed, 84 insertions(+), 62 deletions(-) create mode 100644 paddle/fluid/platform/event.h diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index b7e84031e7..1838506c89 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -87,11 +87,11 @@ nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) cc_library(timer SRCS timer.cc) cc_test(timer_test SRCS timer_test.cc DEPS timer) -cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto device_context ${GPU_CTX_DEPS}) +cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) if(WITH_GPU) - nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_context device_tracer) + nv_library(profiler SRCS profiler.cc profiler.cu DEPS device_tracer gpu_info enforce) else() - cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) + cc_library(profiler SRCS profiler.cc DEPS device_tracer enforce) endif() cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index 6ee2c36146..d4418d836d 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/platform/dynload/cupti.h" +#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.pb.h" @@ -32,8 +33,6 @@ inline uint64_t PosixInNsec() { return 1000 * (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec); } -class Event; - // DeviceTracer performs the following tasks: // 1. Register cuda callbacks for various events: kernel, memcpy, etc. // 2. Collect cuda statistics: start/end ts, memory, etc. diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h new file mode 100644 index 0000000000..a4db23758b --- /dev/null +++ b/paddle/fluid/platform/event.h @@ -0,0 +1,65 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace platform { + +enum EventType { kMark, kPushRange, kPopRange }; + +class Event { + public: + // The DeviceContext is used to get the cuda stream. + // If CPU profiling mode, can pass nullptr. + Event(EventType type, std::string name, uint32_t thread_id); + + const EventType& type() const; + std::string name() const { return name_; } + uint32_t thread_id() const { return thread_id_; } + +#ifdef PADDLE_WITH_CUDA +#ifndef PADDLE_WITH_CUPTI + cudaEvent_t event() const { return event_; } + int device() const { return device_; } +#endif +#endif + + double CpuElapsedMs(const Event& e) const; + double CudaElapsedMs(const Event& e) const; + + private: + EventType type_; + std::string name_; + uint32_t thread_id_; + int64_t cpu_ns_; +#ifdef PADDLE_WITH_CUDA +#ifdef PADDLE_WITH_CUPTI + int64_t gpu_ns_ = 0; + + public: + void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) { + gpu_ns_ += end_ns - start_ns; + } + + private: +#else + cudaEvent_t event_ = nullptr; + int device_ = -1; +#endif +#endif +}; +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/profiler.cu b/paddle/fluid/platform/profiler.cu index e115c554ca..aed276b16e 100644 --- a/paddle/fluid/platform/profiler.cu +++ b/paddle/fluid/platform/profiler.cu @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/profiler.h" - #include +#include "paddle/fluid/platform/profiler.h" namespace paddle { namespace platform { @@ -22,26 +21,27 @@ namespace platform { __global__ void DummyKernel(int *a) { a[0] = 0; } static void ForEachDevice(std::function func) { - auto original_device = GetCurrentDeviceId(); - int count = GetCUDADeviceCount(); + auto original_device = platform::GetCurrentDeviceId(); + int count = platform::GetCUDADeviceCount(); for (int i = 0; i < count; i++) { - SetDeviceId(i); + platform::SetDeviceId(i); func(i); } - SetDeviceId(original_device); + platform::SetDeviceId(original_device); } void DummyKernelAndEvent() { for (int i = 0; i < 5; i++) { ForEachDevice([](int d) { - CUDADeviceContext *dev_ctx = new CUDADeviceContext(CUDAPlace(d)); + platform::SetDeviceId(d); + cudaStream_t stream; + PADDLE_ENFORCE(cudaStreamCreate(&stream)); Mark("_cuda_startup_"); int *ptr; PADDLE_ENFORCE(cudaMalloc(&ptr, sizeof(int))); - DummyKernel<<<1, 1, 0, dev_ctx->stream()>>>(ptr); - dev_ctx->Wait(); + DummyKernel<<<1, 1, 0, stream>>>(ptr); + PADDLE_ENFORCE(cudaStreamSynchronize(stream)); PADDLE_ENFORCE(cudaFree(ptr)); - delete dev_ctx; }); } } diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 4057e5ea05..aec0ae3429 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -17,54 +17,13 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/device_context.h" - -namespace paddle { -namespace platform { - -enum EventType { kMark, kPushRange, kPopRange }; - -class Event { - public: - // The DeviceContext is used to get the cuda stream. - // If CPU profiling mode, can pass nullptr. - Event(EventType type, std::string name, uint32_t thread_id); - - const EventType& type() const; - std::string name() const { return name_; } - uint32_t thread_id() const { return thread_id_; } - +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/event.h" #ifdef PADDLE_WITH_CUDA -#ifndef PADDLE_WITH_CUPTI - cudaEvent_t event() const { return event_; } - int device() const { return device_; } -#endif +#include "paddle/fluid/platform/gpu_info.h" #endif - - double CpuElapsedMs(const Event& e) const; - double CudaElapsedMs(const Event& e) const; - - private: - EventType type_; - std::string name_; - uint32_t thread_id_; - int64_t cpu_ns_; -#ifdef PADDLE_WITH_CUDA -#ifdef PADDLE_WITH_CUPTI - int64_t gpu_ns_ = 0; - - public: - void AddCudaElapsedTime(int64_t start_ns, int64_t end_ns) { - gpu_ns_ += end_ns - start_ns; - } - - private: -#else - cudaEvent_t event_ = nullptr; - int device_ = -1; -#endif -#endif -}; +namespace paddle { +namespace platform { enum ProfilerState { kDisabled, // disabled state diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index 528fe03c67..a851488e72 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -33,7 +33,6 @@ TEST(Event, CpuElapsedTime) { } TEST(RecordEvent, RecordEvent) { - using paddle::platform::DeviceContext; using paddle::platform::Event; using paddle::platform::EventType; using paddle::platform::RecordEvent;