|
|
|
@ -134,7 +134,26 @@ USE_CUDA_ATOMIC(Max, int);
|
|
|
|
|
USE_CUDA_ATOMIC(Max, unsigned int);
|
|
|
|
|
// CUDA API uses unsigned long long int, we cannot use uint64_t here.
|
|
|
|
|
// It because unsigned long long int is not necessarily uint64_t
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
|
|
|
USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT
|
|
|
|
|
#else
|
|
|
|
|
CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) {
|
|
|
|
|
if (*address >= val) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned long long int old = *address, assumed;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
assumed = old;
|
|
|
|
|
if (assumed >= val) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
old = atomicCAS(address, assumed, val);
|
|
|
|
|
} while (assumed != old);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
CUDA_ATOMIC_WRAPPER(Max, int64_t) {
|
|
|
|
|
// Here, we check long long int must be int64_t.
|
|
|
|
@ -187,7 +206,26 @@ USE_CUDA_ATOMIC(Min, int);
|
|
|
|
|
USE_CUDA_ATOMIC(Min, unsigned int);
|
|
|
|
|
// CUDA API uses unsigned long long int, we cannot use uint64_t here.
|
|
|
|
|
// It because unsigned long long int is not necessarily uint64_t
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
|
|
|
|
USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT
|
|
|
|
|
#else
|
|
|
|
|
CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) {
|
|
|
|
|
if (*address <= val) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned long long int old = *address, assumed;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
assumed = old;
|
|
|
|
|
if (assumed <= val) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
old = atomicCAS(address, assumed, val);
|
|
|
|
|
} while (assumed != old);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
CUDA_ATOMIC_WRAPPER(Min, int64_t) {
|
|
|
|
|
// Here, we check long long int must be int64_t.
|
|
|
|
|