parent
2c89d97538
commit
b9ec24c6e9
@ -0,0 +1,41 @@
|
|||||||
|
if(NOT WITH_GPU)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT")
|
||||||
|
find_path(CUPTI_INCLUDE_DIR cupti.h
|
||||||
|
PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include
|
||||||
|
$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
|
||||||
|
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
|
||||||
|
NO_DEFAULT_PATH
|
||||||
|
)
|
||||||
|
|
||||||
|
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
|
||||||
|
|
||||||
|
set(TARGET_ARCH "x86_64")
|
||||||
|
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
|
||||||
|
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
list(APPEND CUPTI_CHECK_LIBRARY_DIRS
|
||||||
|
${CUPTI_ROOT}
|
||||||
|
${CUPTI_ROOT}/lib64
|
||||||
|
${CUPTI_ROOT}/lib
|
||||||
|
${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu
|
||||||
|
$ENV{CUPTI_ROOT}
|
||||||
|
$ENV{CUPTI_ROOT}/lib64
|
||||||
|
$ENV{CUPTI_ROOT}/lib
|
||||||
|
/usr/lib
|
||||||
|
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64)
|
||||||
|
find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a
|
||||||
|
PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist}
|
||||||
|
NO_DEFAULT_PATH
|
||||||
|
DOC "Path to cuPTI library.")
|
||||||
|
|
||||||
|
get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY)
|
||||||
|
if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY)
|
||||||
|
set(CUPTI_FOUND ON)
|
||||||
|
else()
|
||||||
|
set(CUPTI_FOUND OFF)
|
||||||
|
endif()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,72 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "paddle/fluid/platform/dynload/cupti.h"
|
||||||
|
#include "paddle/fluid/platform/profiler.pb.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace platform {
|
||||||
|
|
||||||
|
///////////////////////
|
||||||
|
// WARN: Under Development. Don't depend on it yet.
|
||||||
|
//////////////////////
|
||||||
|
|
||||||
|
// DeviceTracer performs the following tasks:
|
||||||
|
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
|
||||||
|
// 2. Collect cuda statistics: start/end ts, memory, etc.
|
||||||
|
// 3. Generate a protobuf for further analysis.
|
||||||
|
class DeviceTracer {
|
||||||
|
public:
|
||||||
|
struct KernelRecord {
|
||||||
|
uint64_t start_ns;
|
||||||
|
uint64_t end_ns;
|
||||||
|
uint32_t device_id;
|
||||||
|
uint32_t stream_id;
|
||||||
|
uint32_t correlation_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
virtual ~DeviceTracer() {}
|
||||||
|
// Needs to be called once before use.
|
||||||
|
virtual void Enable() = 0;
|
||||||
|
// Needs to be called once after use.
|
||||||
|
virtual void Disable() = 0;
|
||||||
|
|
||||||
|
// Add a pair to correlate internal cuda id with high level
|
||||||
|
// annotation (string). So cuda statistics can be represented by
|
||||||
|
// human-readable annotations.
|
||||||
|
virtual void AddAnnotation(uint64_t id, const std::string& anno) = 0;
|
||||||
|
|
||||||
|
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation
|
||||||
|
// added before for human readability.
|
||||||
|
virtual void AddKernelRecords(uint64_t start, uint64_t end,
|
||||||
|
uint32_t device_id, uint32_t stream_id,
|
||||||
|
uint32_t correlation_id) = 0;
|
||||||
|
|
||||||
|
// Generate a proto after done (Disabled).
|
||||||
|
virtual proto::Profile GenProfile() = 0;
|
||||||
|
|
||||||
|
virtual bool IsEnabled() = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get a DeviceTracer.
|
||||||
|
DeviceTracer* GetDeviceTracer();
|
||||||
|
|
||||||
|
// Set a name for the cuda kernel operation being launched by the thread.
|
||||||
|
void SetCurAnnotation(const char* anno);
|
||||||
|
// Clear the name after the operation is done.
|
||||||
|
void ClearCurAnnotation();
|
||||||
|
|
||||||
|
} // namespace platform
|
||||||
|
} // namespace paddle
|
@ -1,4 +1,8 @@
|
|||||||
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
|
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
|
||||||
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc
|
|
||||||
DEPS dynamic_loader)
|
list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc)
|
||||||
|
if (CUPTI_FOUND)
|
||||||
|
list(APPEND CUDA_SRCS cupti.cc)
|
||||||
|
endif(CUPTI_FOUND)
|
||||||
|
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader)
|
||||||
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
|
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
|
||||||
|
@ -0,0 +1,35 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUPTI
|
||||||
|
|
||||||
|
#include "paddle/fluid/platform/dynload/cupti.h"
|
||||||
|
#include "paddle/fluid/platform/enforce.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace platform {
|
||||||
|
namespace dynload {
|
||||||
|
|
||||||
|
std::once_flag cupti_dso_flag;
|
||||||
|
void *cupti_dso_handle = nullptr;
|
||||||
|
|
||||||
|
#define DEFINE_WRAP(__name) DynLoad__##__name __name
|
||||||
|
|
||||||
|
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
|
||||||
|
|
||||||
|
} // namespace dynload
|
||||||
|
} // namespace platform
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
#endif // PADDLE_WITH_CUPTI
|
@ -0,0 +1,86 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUPTI
|
||||||
|
#include <cuda.h>
|
||||||
|
#include <cupti.h>
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#include <mutex>
|
||||||
|
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace platform {
|
||||||
|
namespace dynload {
|
||||||
|
|
||||||
|
extern std::once_flag cupti_dso_flag;
|
||||||
|
extern void *cupti_dso_handle;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The following macro definition can generate structs
|
||||||
|
* (for each function) to dynamic load cupti routine
|
||||||
|
* via operator overloading.
|
||||||
|
*
|
||||||
|
* note: default dynamic linked libs
|
||||||
|
*/
|
||||||
|
#ifdef PADDLE_USE_DSO
|
||||||
|
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
|
||||||
|
struct DynLoad__##__name { \
|
||||||
|
template <typename... Args> \
|
||||||
|
inline CUptiResult CUPTIAPI operator()(Args... args) { \
|
||||||
|
typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \
|
||||||
|
std::call_once(cupti_dso_flag, \
|
||||||
|
paddle::platform::dynload::GetCUPTIDsoHandle, \
|
||||||
|
&cupti_dso_handle); \
|
||||||
|
void *p_##__name = dlsym(cupti_dso_handle, #__name); \
|
||||||
|
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
|
||||||
|
} \
|
||||||
|
}; \
|
||||||
|
extern DynLoad__##__name __name
|
||||||
|
#else
|
||||||
|
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
|
||||||
|
struct DynLoad__##__name { \
|
||||||
|
template <typename... Args> \
|
||||||
|
inline CUptiResult CUPTIAPI operator()(Args... args) { \
|
||||||
|
return __name(args...); \
|
||||||
|
} \
|
||||||
|
}; \
|
||||||
|
extern DynLoad__##__name __name
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CUPTI_ROUTINE_EACH(__macro) \
|
||||||
|
__macro(cuptiActivityEnable); \
|
||||||
|
__macro(cuptiActivityDisable); \
|
||||||
|
__macro(cuptiActivityRegisterCallbacks); \
|
||||||
|
__macro(cuptiActivityGetAttribute); \
|
||||||
|
__macro(cuptiActivitySetAttribute); \
|
||||||
|
__macro(cuptiGetTimestamp); \
|
||||||
|
__macro(cuptiActivityGetNextRecord); \
|
||||||
|
__macro(cuptiGetResultString); \
|
||||||
|
__macro(cuptiActivityGetNumDroppedRecords); \
|
||||||
|
__macro(cuptiActivityFlushAll); \
|
||||||
|
__macro(cuptiFinalize); \
|
||||||
|
__macro(cuptiSubscribe); \
|
||||||
|
__macro(cuptiUnsubscribe); \
|
||||||
|
__macro(cuptiEnableCallback);
|
||||||
|
|
||||||
|
CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
|
||||||
|
|
||||||
|
#undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
|
||||||
|
} // namespace dynload
|
||||||
|
} // namespace platform
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
#endif // PADDLE_WITH_CUPTI
|
@ -0,0 +1,30 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
syntax = "proto2";
|
||||||
|
package paddle.platform.proto;
|
||||||
|
|
||||||
|
message Event {
|
||||||
|
optional string name = 1;
|
||||||
|
optional uint64 start_ns = 2;
|
||||||
|
optional uint64 end_ns = 3;
|
||||||
|
optional uint32 device_id = 5;
|
||||||
|
optional uint32 stream_id = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Profile {
|
||||||
|
repeated Event events = 1;
|
||||||
|
optional uint64 start_ns = 2;
|
||||||
|
optional uint64 end_ns = 3;
|
||||||
|
}
|
@ -0,0 +1,46 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import paddle.v2.fluid as fluid
|
||||||
|
import paddle.v2.fluid.profiler as profiler
|
||||||
|
import paddle.v2.fluid.layers as layers
|
||||||
|
import paddle.v2.fluid.core as core
|
||||||
|
|
||||||
|
|
||||||
|
class TestNVProf(unittest.TestCase):
|
||||||
|
def test_nvprof(self):
|
||||||
|
if not fluid.core.is_compiled_with_cuda():
|
||||||
|
return
|
||||||
|
epoc = 8
|
||||||
|
dshape = [4, 3, 28, 28]
|
||||||
|
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
|
||||||
|
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
|
||||||
|
|
||||||
|
place = fluid.CUDAPlace(0)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(fluid.default_startup_program())
|
||||||
|
|
||||||
|
output_file = 'cuda_profiler.txt'
|
||||||
|
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
|
||||||
|
for i in range(epoc):
|
||||||
|
input = np.random.random(dshape).astype('float32')
|
||||||
|
exe.run(fluid.default_main_program(), feed={'data': input})
|
||||||
|
os.remove(output_file)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in new issue