|
|
|
@ -484,72 +484,107 @@ DEVICE inline bool operator>=(const half& a, const half& b) {
|
|
|
|
|
#endif // PADDLE_CUDA_FP16
|
|
|
|
|
|
|
|
|
|
// Arithmetic operators for float16 on GPU
|
|
|
|
|
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
DEVICE inline float16 operator+(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(PADDLE_CUDA_FP16)
|
|
|
|
|
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return float16(__hadd(half(a), half(b)));
|
|
|
|
|
#else
|
|
|
|
|
return float16(float(a) + float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16 operator-(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return float16(__hsub(half(a), half(b)));
|
|
|
|
|
#else
|
|
|
|
|
return float16(float(a) - float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16 operator*(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return float16(__hmul(half(a), half(b)));
|
|
|
|
|
#else
|
|
|
|
|
return float16(float(a) * float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16 operator/(const float16& a, const float16& b) {
|
|
|
|
|
// TODO(kexinzhao): check the cuda version that starts to support __hdiv
|
|
|
|
|
HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
|
|
|
|
// TODO(kexinzhao): check which cuda version starts to support __hdiv
|
|
|
|
|
float num = __half2float(half(a));
|
|
|
|
|
float denom = __half2float(half(b));
|
|
|
|
|
return float16(num / denom);
|
|
|
|
|
#else
|
|
|
|
|
return float16(float(a) / float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16 operator-(const float16& a) {
|
|
|
|
|
HOSTDEVICE inline float16 operator-(const float16& a) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return float16(__hneg(half(a)));
|
|
|
|
|
#else
|
|
|
|
|
float16 res;
|
|
|
|
|
res.x = a.x ^ 0x8000;
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16& operator+=(float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) {
|
|
|
|
|
a = a + b;
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16& operator-=(float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) {
|
|
|
|
|
a = a - b;
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16& operator*=(float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) {
|
|
|
|
|
a = a * b;
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline float16& operator/=(float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) {
|
|
|
|
|
a = a / b;
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator==(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __heq(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) == float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator!=(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __hne(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) != float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator<(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __hlt(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) < float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator<=(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __hle(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) <= float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator>(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __hgt(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) > float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DEVICE inline bool operator>=(const float16& a, const float16& b) {
|
|
|
|
|
HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
|
|
|
|
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
|
|
|
|
return __hge(half(a), half(b));
|
|
|
|
|
#else
|
|
|
|
|
return float(a) >= float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Arithmetic operators for float16 on ARMv8.2-A CPU
|
|
|
|
@ -737,71 +772,71 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
|
|
|
|
|
return (res & 0xffff) != 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Arithmetic operators for float16, software emulated on other CPU/GPU
|
|
|
|
|
// Arithmetic operators for float16, software emulated on other CPU
|
|
|
|
|
#else
|
|
|
|
|
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16 operator+(const float16& a, const float16& b) {
|
|
|
|
|
return float16(float(a) + float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 operator-(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16 operator-(const float16& a, const float16& b) {
|
|
|
|
|
return float16(float(a) - float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 operator*(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16 operator*(const float16& a, const float16& b) {
|
|
|
|
|
return float16(float(a) * float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 operator/(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16 operator/(const float16& a, const float16& b) {
|
|
|
|
|
return float16(float(a) / float(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 operator-(const float16& a) {
|
|
|
|
|
HOST inline float16 operator-(const float16& a) {
|
|
|
|
|
float16 res;
|
|
|
|
|
res.x = a.x ^ 0x8000;
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16& operator+=(float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16& operator+=(float16& a, const float16& b) {
|
|
|
|
|
a = float16(float(a) + float(b));
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16& operator-=(float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16& operator-=(float16& a, const float16& b) {
|
|
|
|
|
a = float16(float(a) - float(b));
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16& operator*=(float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16& operator*=(float16& a, const float16& b) {
|
|
|
|
|
a = float16(float(a) * float(b));
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16& operator/=(float16& a, const float16& b) {
|
|
|
|
|
HOST inline float16& operator/=(float16& a, const float16& b) {
|
|
|
|
|
a = float16(float(a) / float(b));
|
|
|
|
|
return a;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator==(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator==(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) == float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator!=(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator!=(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) != float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator<(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator<(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) < float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator<=(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator<=(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) <= float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator>(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator>(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) > float(b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool operator>=(const float16& a, const float16& b) {
|
|
|
|
|
HOST inline bool operator>=(const float16& a, const float16& b) {
|
|
|
|
|
return float(a) >= float(b);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|