Fix compile on CUDA9.1 & MacOS (#6642)

del_some_in_makelist
Yu Yang 7 years ago committed by GitHub
parent 8a24915d39
commit d5cab4f07c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -79,7 +79,7 @@ public:
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline explicit float16(const half& h) {
#if CUDA_VERSION >= 9000
x = reinterpret_cast<__half_raw*>(&h)->x;
x = reinterpret_cast<__half_raw*>(const_cast<half*>(&h))->x;
#else
x = h.x;
#endif // CUDA_VERSION >= 9000
@ -145,7 +145,7 @@ public:
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE inline float16& operator=(const half& rhs) {
#if CUDA_VERSION >= 9000
x = reinterpret_cast<__half_raw*>(&rhs)->x;
x = reinterpret_cast<__half_raw*>(const_cast<half*>(&rhs))->x;
#else
x = rhs.x;
#endif

@ -25,6 +25,11 @@ void *nccl_dso_handle;
NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
void LoadNCCLDSO() {
platform::call_once(nccl_dso_flag,
[] { GetNCCLDsoHandle(&nccl_dso_handle); });
}
} // namespace dynload
} // namespace platform
} // namespace paddle

@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag;
extern void* nccl_dso_handle;
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(__name(args...)) (*)(Args...); \
platform::call_once(nccl_dso_flag, \
paddle::platform::dynload::GetNCCLDsoHandle, \
&nccl_dso_handle); \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern void LoadNCCLDSO();
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(__name(args...)) (*)(Args...); \
paddle::platform::dynload::LoadNCCLDSO(); \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#else
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \

@ -31,7 +31,7 @@ namespace platform {
TEST(NCCL, init) {
std::vector<ncclComm_t> comms;
comms.resize(dev_count);
PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr));
dynload::ncclCommInitAll(comms.data(), dev_count, nullptr);
for (int i = 0; i < dev_count; ++i) {
dynload::ncclCommDestroy(comms[i]);
}
@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) {
std::vector<ncclComm_t> comms;
comms.resize(dev_count);
VLOG(1) << "Initializing ncclComm";
PADDLE_ENFORCE(dynload::ncclCommInitAll(comms.data(), dev_count, nullptr));
dynload::ncclCommInitAll(comms.data(), dev_count, nullptr);
VLOG(1) << "ncclComm initialized";
VLOG(1) << "Creating thread data";
std::vector<std::unique_ptr<PerThreadData<double>>> data;

@ -14,6 +14,19 @@
#pragma once
#ifdef __CUDACC__
#ifdef __CUDACC_VER_MAJOR__
// CUDA 9 define `__CUDACC_VER__` as a warning message, manually define
// __CUDACC_VER__ instead.
#undef __CUDACC_VER__
#define __CUDACC_VER__ \
(__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 + \
__CUDACC_VER_BUILD__)
#endif
#endif
#include <boost/config.hpp>
#ifdef PADDLE_WITH_CUDA

1
python/.gitignore vendored

@ -2,6 +2,7 @@
build
dist
paddle.egg-info
paddlepaddle_gpu.egg-info
.idea
paddle/proto/*.py
paddle/proto/*.pyc

Loading…
Cancel
Save