Merge pull request #8542 from panyx0718/test
	
		
	
				
					
				
			Extend current profiler for timeline and more features.tonyyang-svail-patch-1
						commit
						decaad5ccc
					
				| @ -0,0 +1,41 @@ | ||||
| if(NOT WITH_GPU) | ||||
|     return() | ||||
| endif() | ||||
| 
 | ||||
| 
 | ||||
| set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT") | ||||
| find_path(CUPTI_INCLUDE_DIR cupti.h | ||||
|         PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include | ||||
|         $ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include | ||||
|         ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include | ||||
|         NO_DEFAULT_PATH | ||||
|         ) | ||||
| 
 | ||||
| get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) | ||||
| 
 | ||||
| set(TARGET_ARCH "x86_64") | ||||
| if(NOT ${CMAKE_SYSTEM_PROCESSOR}) | ||||
|     set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) | ||||
| endif() | ||||
| 
 | ||||
| list(APPEND CUPTI_CHECK_LIBRARY_DIRS | ||||
|         ${CUPTI_ROOT} | ||||
|         ${CUPTI_ROOT}/lib64 | ||||
|         ${CUPTI_ROOT}/lib | ||||
|         ${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu | ||||
|         $ENV{CUPTI_ROOT} | ||||
|         $ENV{CUPTI_ROOT}/lib64 | ||||
|         $ENV{CUPTI_ROOT}/lib | ||||
|         /usr/lib | ||||
|         ${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64) | ||||
| find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a | ||||
|        PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist} | ||||
|        NO_DEFAULT_PATH | ||||
|        DOC "Path to cuPTI library.") | ||||
| 
 | ||||
| get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY) | ||||
| if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY) | ||||
|     set(CUPTI_FOUND ON) | ||||
| else() | ||||
|     set(CUPTI_FOUND OFF) | ||||
| endif() | ||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								| @ -0,0 +1,72 @@ | ||||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| 
 | ||||
| licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| #pragma once | ||||
| #include "paddle/fluid/platform/dynload/cupti.h" | ||||
| #include "paddle/fluid/platform/profiler.pb.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace platform { | ||||
| 
 | ||||
| ///////////////////////
 | ||||
| // WARN: Under Development. Don't depend on it yet.
 | ||||
| //////////////////////
 | ||||
| 
 | ||||
| // DeviceTracer performs the following tasks:
 | ||||
| // 1. Register cuda callbacks for various events: kernel, memcpy, etc.
 | ||||
| // 2. Collect cuda statistics: start/end ts, memory, etc.
 | ||||
| // 3. Generate a protobuf for further analysis.
 | ||||
| class DeviceTracer { | ||||
|  public: | ||||
|   struct KernelRecord { | ||||
|     uint64_t start_ns; | ||||
|     uint64_t end_ns; | ||||
|     uint32_t device_id; | ||||
|     uint32_t stream_id; | ||||
|     uint32_t correlation_id; | ||||
|   }; | ||||
| 
 | ||||
|   virtual ~DeviceTracer() {} | ||||
|   // Needs to be called once before use.
 | ||||
|   virtual void Enable() = 0; | ||||
|   // Needs to be called once after use.
 | ||||
|   virtual void Disable() = 0; | ||||
| 
 | ||||
|   // Add a pair to correlate internal cuda id with high level
 | ||||
|   // annotation (string). So cuda statistics can be represented by
 | ||||
|   // human-readable annotations.
 | ||||
|   virtual void AddAnnotation(uint64_t id, const std::string& anno) = 0; | ||||
| 
 | ||||
|   // Add a cuda kernel stats. `correlation_id` will be mapped to annotation
 | ||||
|   // added before for human readability.
 | ||||
|   virtual void AddKernelRecords(uint64_t start, uint64_t end, | ||||
|                                 uint32_t device_id, uint32_t stream_id, | ||||
|                                 uint32_t correlation_id) = 0; | ||||
| 
 | ||||
|   // Generate a proto after done (Disabled).
 | ||||
|   virtual proto::Profile GenProfile() = 0; | ||||
| 
 | ||||
|   virtual bool IsEnabled() = 0; | ||||
| }; | ||||
| 
 | ||||
| // Get a DeviceTracer.
 | ||||
| DeviceTracer* GetDeviceTracer(); | ||||
| 
 | ||||
| // Set a name for the cuda kernel operation being launched by the thread.
 | ||||
| void SetCurAnnotation(const char* anno); | ||||
| // Clear the name after the operation is done.
 | ||||
| void ClearCurAnnotation(); | ||||
| 
 | ||||
| }  // namespace platform
 | ||||
| }  // namespace paddle
 | ||||
| @ -1,4 +1,8 @@ | ||||
| cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) | ||||
| nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc | ||||
|         DEPS dynamic_loader) | ||||
| 
 | ||||
| list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc) | ||||
| if (CUPTI_FOUND) | ||||
|     list(APPEND CUDA_SRCS cupti.cc) | ||||
| endif(CUPTI_FOUND) | ||||
| nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader) | ||||
| cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) | ||||
|  | ||||
| @ -0,0 +1,35 @@ | ||||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| #ifdef PADDLE_WITH_CUPTI | ||||
| 
 | ||||
| #include "paddle/fluid/platform/dynload/cupti.h" | ||||
| #include "paddle/fluid/platform/enforce.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace platform { | ||||
| namespace dynload { | ||||
| 
 | ||||
| std::once_flag cupti_dso_flag; | ||||
| void *cupti_dso_handle = nullptr; | ||||
| 
 | ||||
| #define DEFINE_WRAP(__name) DynLoad__##__name __name | ||||
| 
 | ||||
| CUPTI_ROUTINE_EACH(DEFINE_WRAP); | ||||
| 
 | ||||
| }  // namespace dynload
 | ||||
| }  // namespace platform
 | ||||
| }  // namespace paddle
 | ||||
| 
 | ||||
| #endif  // PADDLE_WITH_CUPTI
 | ||||
| @ -0,0 +1,86 @@ | ||||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #ifdef PADDLE_WITH_CUPTI | ||||
| #include <cuda.h> | ||||
| #include <cupti.h> | ||||
| #include <dlfcn.h> | ||||
| #include <mutex> | ||||
| #include "paddle/fluid/platform/dynload/dynamic_loader.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace platform { | ||||
| namespace dynload { | ||||
| 
 | ||||
| extern std::once_flag cupti_dso_flag; | ||||
| extern void *cupti_dso_handle; | ||||
| 
 | ||||
| /**
 | ||||
|  * The following macro definition can generate structs | ||||
|  * (for each function) to dynamic load cupti routine | ||||
|  * via operator overloading. | ||||
|  * | ||||
|  * note: default dynamic linked libs | ||||
|  */ | ||||
| #ifdef PADDLE_USE_DSO | ||||
| #define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name)                    \ | ||||
|   struct DynLoad__##__name {                                       \ | ||||
|     template <typename... Args>                                    \ | ||||
|     inline CUptiResult CUPTIAPI operator()(Args... args) {         \ | ||||
|       typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...);          \ | ||||
|       std::call_once(cupti_dso_flag,                               \ | ||||
|                      paddle::platform::dynload::GetCUPTIDsoHandle, \ | ||||
|                      &cupti_dso_handle);                           \ | ||||
|       void *p_##__name = dlsym(cupti_dso_handle, #__name);         \ | ||||
|       return reinterpret_cast<cuptiFunc>(p_##__name)(args...);     \ | ||||
|     }                                                              \ | ||||
|   };                                                               \ | ||||
|   extern DynLoad__##__name __name | ||||
| #else | ||||
| #define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name)            \ | ||||
|   struct DynLoad__##__name {                               \ | ||||
|     template <typename... Args>                            \ | ||||
|     inline CUptiResult CUPTIAPI operator()(Args... args) { \ | ||||
|       return __name(args...);                              \ | ||||
|     }                                                      \ | ||||
|   };                                                       \ | ||||
|   extern DynLoad__##__name __name | ||||
| #endif | ||||
| 
 | ||||
| #define CUPTI_ROUTINE_EACH(__macro)           \ | ||||
|   __macro(cuptiActivityEnable);               \ | ||||
|   __macro(cuptiActivityDisable);              \ | ||||
|   __macro(cuptiActivityRegisterCallbacks);    \ | ||||
|   __macro(cuptiActivityGetAttribute);         \ | ||||
|   __macro(cuptiActivitySetAttribute);         \ | ||||
|   __macro(cuptiGetTimestamp);                 \ | ||||
|   __macro(cuptiActivityGetNextRecord);        \ | ||||
|   __macro(cuptiGetResultString);              \ | ||||
|   __macro(cuptiActivityGetNumDroppedRecords); \ | ||||
|   __macro(cuptiActivityFlushAll);             \ | ||||
|   __macro(cuptiFinalize);                     \ | ||||
|   __macro(cuptiSubscribe);                    \ | ||||
|   __macro(cuptiUnsubscribe);                  \ | ||||
|   __macro(cuptiEnableCallback); | ||||
| 
 | ||||
| CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP); | ||||
| 
 | ||||
| #undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP | ||||
| }  // namespace dynload
 | ||||
| }  // namespace platform
 | ||||
| }  // namespace paddle
 | ||||
| 
 | ||||
| #endif  // PADDLE_WITH_CUPTI
 | ||||
| @ -0,0 +1,30 @@ | ||||
| /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||||
| 
 | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
| 
 | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| 
 | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. */ | ||||
| 
 | ||||
| syntax = "proto2"; | ||||
| package paddle.platform.proto; | ||||
| 
 | ||||
| message Event { | ||||
|   optional string name = 1; | ||||
|   optional uint64 start_ns = 2; | ||||
|   optional uint64 end_ns = 3; | ||||
|   optional uint32 device_id = 5; | ||||
|   optional uint32 stream_id = 6; | ||||
| } | ||||
| 
 | ||||
| message Profile { | ||||
|   repeated Event events = 1; | ||||
|   optional uint64 start_ns = 2; | ||||
|   optional uint64 end_ns = 3; | ||||
| } | ||||
| @ -0,0 +1,46 @@ | ||||
| #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| 
 | ||||
| import unittest | ||||
| import os | ||||
| import numpy as np | ||||
| import paddle.fluid as fluid | ||||
| import paddle.fluid.profiler as profiler | ||||
| import paddle.fluid.layers as layers | ||||
| import paddle.fluid.core as core | ||||
| 
 | ||||
| 
 | ||||
| class TestNVProf(unittest.TestCase): | ||||
|     def test_nvprof(self): | ||||
|         if not fluid.core.is_compiled_with_cuda(): | ||||
|             return | ||||
|         epoc = 8 | ||||
|         dshape = [4, 3, 28, 28] | ||||
|         data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') | ||||
|         conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) | ||||
| 
 | ||||
|         place = fluid.CUDAPlace(0) | ||||
|         exe = fluid.Executor(place) | ||||
|         exe.run(fluid.default_startup_program()) | ||||
| 
 | ||||
|         output_file = 'cuda_profiler.txt' | ||||
|         with profiler.cuda_profiler(output_file, 'csv') as nvprof: | ||||
|             for i in range(epoc): | ||||
|                 input = np.random.random(dshape).astype('float32') | ||||
|                 exe.run(fluid.default_main_program(), feed={'data': input}) | ||||
|         os.remove(output_file) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
					Loading…
					
					
				
		Reference in new issue