parent
2c89d97538
commit
b9ec24c6e9
@ -0,0 +1,41 @@
|
||||
if(NOT WITH_GPU)
|
||||
return()
|
||||
endif()
|
||||
|
||||
|
||||
set(CUPTI_ROOT "/usr" CACHE PATH "CUPTI ROOT")
|
||||
find_path(CUPTI_INCLUDE_DIR cupti.h
|
||||
PATHS ${CUPTI_ROOT} ${CUPTI_ROOT}/include
|
||||
$ENV{CUPTI_ROOT} $ENV{CUPTI_ROOT}/include
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/include
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
|
||||
|
||||
set(TARGET_ARCH "x86_64")
|
||||
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
|
||||
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
endif()
|
||||
|
||||
list(APPEND CUPTI_CHECK_LIBRARY_DIRS
|
||||
${CUPTI_ROOT}
|
||||
${CUPTI_ROOT}/lib64
|
||||
${CUPTI_ROOT}/lib
|
||||
${CUPTI_ROOT}/lib/${TARGET_ARCH}-linux-gnu
|
||||
$ENV{CUPTI_ROOT}
|
||||
$ENV{CUPTI_ROOT}/lib64
|
||||
$ENV{CUPTI_ROOT}/lib
|
||||
/usr/lib
|
||||
${CUDA_TOOLKIT_ROOT_DIR}/extras/CUPTI/lib64)
|
||||
find_library(CUPTI_LIBRARY NAMES libcupti.so libcupti.dylib # libcupti_static.a
|
||||
PATHS ${CUPTI_CHECK_LIBRARY_DIRS} ${CUPTI_INCLUDE_DIR} ${__libpath_hist}
|
||||
NO_DEFAULT_PATH
|
||||
DOC "Path to cuPTI library.")
|
||||
|
||||
get_filename_component(CUPTI_LIBRARY_PATH ${CUPTI_LIBRARY} DIRECTORY)
|
||||
if(CUPTI_INCLUDE_DIR AND CUPTI_LIBRARY)
|
||||
set(CUPTI_FOUND ON)
|
||||
else()
|
||||
set(CUPTI_FOUND OFF)
|
||||
endif()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,72 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/fluid/platform/dynload/cupti.h"
|
||||
#include "paddle/fluid/platform/profiler.pb.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
///////////////////////
|
||||
// WARN: Under Development. Don't depend on it yet.
|
||||
//////////////////////
|
||||
|
||||
// DeviceTracer performs the following tasks:
|
||||
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.
|
||||
// 2. Collect cuda statistics: start/end ts, memory, etc.
|
||||
// 3. Generate a protobuf for further analysis.
|
||||
class DeviceTracer {
|
||||
public:
|
||||
struct KernelRecord {
|
||||
uint64_t start_ns;
|
||||
uint64_t end_ns;
|
||||
uint32_t device_id;
|
||||
uint32_t stream_id;
|
||||
uint32_t correlation_id;
|
||||
};
|
||||
|
||||
virtual ~DeviceTracer() {}
|
||||
// Needs to be called once before use.
|
||||
virtual void Enable() = 0;
|
||||
// Needs to be called once after use.
|
||||
virtual void Disable() = 0;
|
||||
|
||||
// Add a pair to correlate internal cuda id with high level
|
||||
// annotation (string). So cuda statistics can be represented by
|
||||
// human-readable annotations.
|
||||
virtual void AddAnnotation(uint64_t id, const std::string& anno) = 0;
|
||||
|
||||
// Add a cuda kernel stats. `correlation_id` will be mapped to annotation
|
||||
// added before for human readability.
|
||||
virtual void AddKernelRecords(uint64_t start, uint64_t end,
|
||||
uint32_t device_id, uint32_t stream_id,
|
||||
uint32_t correlation_id) = 0;
|
||||
|
||||
// Generate a proto after done (Disabled).
|
||||
virtual proto::Profile GenProfile() = 0;
|
||||
|
||||
virtual bool IsEnabled() = 0;
|
||||
};
|
||||
|
||||
// Get a DeviceTracer.
|
||||
DeviceTracer* GetDeviceTracer();
|
||||
|
||||
// Set a name for the cuda kernel operation being launched by the thread.
|
||||
void SetCurAnnotation(const char* anno);
|
||||
// Clear the name after the operation is done.
|
||||
void ClearCurAnnotation();
|
||||
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
@ -1,4 +1,8 @@
|
||||
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
|
||||
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc
|
||||
DEPS dynamic_loader)
|
||||
|
||||
list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc)
|
||||
if (CUPTI_FOUND)
|
||||
list(APPEND CUDA_SRCS cupti.cc)
|
||||
endif(CUPTI_FOUND)
|
||||
nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader)
|
||||
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
|
||||
|
@ -0,0 +1,35 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_CUPTI
|
||||
|
||||
#include "paddle/fluid/platform/dynload/cupti.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace dynload {
|
||||
|
||||
std::once_flag cupti_dso_flag;
|
||||
void *cupti_dso_handle = nullptr;
|
||||
|
||||
#define DEFINE_WRAP(__name) DynLoad__##__name __name
|
||||
|
||||
CUPTI_ROUTINE_EACH(DEFINE_WRAP);
|
||||
|
||||
} // namespace dynload
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
#endif // PADDLE_WITH_CUPTI
|
@ -0,0 +1,86 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef PADDLE_WITH_CUPTI
|
||||
#include <cuda.h>
|
||||
#include <cupti.h>
|
||||
#include <dlfcn.h>
|
||||
#include <mutex>
|
||||
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace dynload {
|
||||
|
||||
extern std::once_flag cupti_dso_flag;
|
||||
extern void *cupti_dso_handle;
|
||||
|
||||
/**
|
||||
* The following macro definition can generate structs
|
||||
* (for each function) to dynamic load cupti routine
|
||||
* via operator overloading.
|
||||
*
|
||||
* note: default dynamic linked libs
|
||||
*/
|
||||
#ifdef PADDLE_USE_DSO
|
||||
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
|
||||
struct DynLoad__##__name { \
|
||||
template <typename... Args> \
|
||||
inline CUptiResult CUPTIAPI operator()(Args... args) { \
|
||||
typedef CUptiResult CUPTIAPI (*cuptiFunc)(Args...); \
|
||||
std::call_once(cupti_dso_flag, \
|
||||
paddle::platform::dynload::GetCUPTIDsoHandle, \
|
||||
&cupti_dso_handle); \
|
||||
void *p_##__name = dlsym(cupti_dso_handle, #__name); \
|
||||
return reinterpret_cast<cuptiFunc>(p_##__name)(args...); \
|
||||
} \
|
||||
}; \
|
||||
extern DynLoad__##__name __name
|
||||
#else
|
||||
#define DECLARE_DYNAMIC_LOAD_CUPTI_WRAP(__name) \
|
||||
struct DynLoad__##__name { \
|
||||
template <typename... Args> \
|
||||
inline CUptiResult CUPTIAPI operator()(Args... args) { \
|
||||
return __name(args...); \
|
||||
} \
|
||||
}; \
|
||||
extern DynLoad__##__name __name
|
||||
#endif
|
||||
|
||||
#define CUPTI_ROUTINE_EACH(__macro) \
|
||||
__macro(cuptiActivityEnable); \
|
||||
__macro(cuptiActivityDisable); \
|
||||
__macro(cuptiActivityRegisterCallbacks); \
|
||||
__macro(cuptiActivityGetAttribute); \
|
||||
__macro(cuptiActivitySetAttribute); \
|
||||
__macro(cuptiGetTimestamp); \
|
||||
__macro(cuptiActivityGetNextRecord); \
|
||||
__macro(cuptiGetResultString); \
|
||||
__macro(cuptiActivityGetNumDroppedRecords); \
|
||||
__macro(cuptiActivityFlushAll); \
|
||||
__macro(cuptiFinalize); \
|
||||
__macro(cuptiSubscribe); \
|
||||
__macro(cuptiUnsubscribe); \
|
||||
__macro(cuptiEnableCallback);
|
||||
|
||||
CUPTI_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUPTI_WRAP);
|
||||
|
||||
#undef DECLARE_DYNAMIC_LOAD_CUPTI_WRAP
|
||||
} // namespace dynload
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
#endif // PADDLE_WITH_CUPTI
|
@ -0,0 +1,30 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
syntax = "proto2";
|
||||
package paddle.platform.proto;
|
||||
|
||||
message Event {
|
||||
optional string name = 1;
|
||||
optional uint64 start_ns = 2;
|
||||
optional uint64 end_ns = 3;
|
||||
optional uint32 device_id = 5;
|
||||
optional uint32 stream_id = 6;
|
||||
}
|
||||
|
||||
message Profile {
|
||||
repeated Event events = 1;
|
||||
optional uint64 start_ns = 2;
|
||||
optional uint64 end_ns = 3;
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
import os
|
||||
import numpy as np
|
||||
import paddle.v2.fluid as fluid
|
||||
import paddle.v2.fluid.profiler as profiler
|
||||
import paddle.v2.fluid.layers as layers
|
||||
import paddle.v2.fluid.core as core
|
||||
|
||||
|
||||
class TestNVProf(unittest.TestCase):
|
||||
def test_nvprof(self):
|
||||
if not fluid.core.is_compiled_with_cuda():
|
||||
return
|
||||
epoc = 8
|
||||
dshape = [4, 3, 28, 28]
|
||||
data = layers.data(name='data', shape=[3, 28, 28], dtype='float32')
|
||||
conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
|
||||
|
||||
place = fluid.CUDAPlace(0)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
output_file = 'cuda_profiler.txt'
|
||||
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
|
||||
for i in range(epoc):
|
||||
input = np.random.random(dshape).astype('float32')
|
||||
exe.run(fluid.default_main_program(), feed={'data': input})
|
||||
os.remove(output_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue