Merge pull request #10366 from chengduoZH/feature/fix_shlf_for_cuda9.0

Fix __shfl and __shfl_down for CUDA9.0
trainerSaveLoadParams
chengduo 7 years ago committed by GitHub
commit 8a071ffb81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -35,6 +35,16 @@ __forceinline__ __device__ T __shfl_sync(unsigned, T val, int src_line,
#define FULL_WARP_MASK 0xFFFFFFFF
#define CREATE_SHFL_MASK(mask, predicate) \
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
template <typename T>
__forceinline__ __device__ T __shfl_down_sync(unsigned mask, T val, int delta) {
return __shfl_down_sync(mask, val, delta);
}
template <typename T>
__forceinline__ __device__ T __shfl_sync(unsigned mask, T val, int src_line,
int width) {
return __shfl_sync(mask, val, src_line, width);
}
#endif
template <typename T>

Loading…
Cancel
Save