|
|
@ -15,6 +15,9 @@ limitations under the License. */
|
|
|
|
#pragma once
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <cmath>
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <limits>
|
|
|
|
#include <limits>
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
@ -25,18 +28,6 @@ limitations under the License. */
|
|
|
|
#include <hip/hip_runtime.h>
|
|
|
|
#include <hip/hip_runtime.h>
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __GNUC__
|
|
|
|
|
|
|
|
#define PADDLE_GNUC_VER (__GNUC__ * 10 + __GNUC_MINOR__)
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define PADDLE_GNUC_VER 0
|
|
|
|
|
|
|
|
#endif // __GNUC__
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef __clang__
|
|
|
|
|
|
|
|
#define PADDLE_CLANG_VER (__clang_major__ * 10 + __clang_minor__)
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define PADDLE_CLANG_VER 0
|
|
|
|
|
|
|
|
#endif // __clang__
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(__CUDACC__) && CUDA_VERSION >= 7050
|
|
|
|
#if defined(__CUDACC__) && CUDA_VERSION >= 7050
|
|
|
|
#define PADDLE_CUDA_FP16
|
|
|
|
#define PADDLE_CUDA_FP16
|
|
|
|
#include <cuda_fp16.h>
|
|
|
|
#include <cuda_fp16.h>
|
|
|
@ -55,17 +46,15 @@ limitations under the License. */
|
|
|
|
|
|
|
|
|
|
|
|
#define CUDA_ARCH_FP16_SUPPORTED(CUDA_ARCH) (CUDA_ARCH >= 600)
|
|
|
|
#define CUDA_ARCH_FP16_SUPPORTED(CUDA_ARCH) (CUDA_ARCH >= 600)
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
#if (defined(__CUDACC__) || defined(__HIPCC__))
|
|
|
|
namespace platform {
|
|
|
|
#define HOSTDEVICE __host__ __device__
|
|
|
|
|
|
|
|
#define DEVICE __device__
|
|
|
|
// Forward declare float16 for eigen.h
|
|
|
|
#define HOST __host__
|
|
|
|
struct float16;
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define HOSTDEVICE
|
|
|
|
} // namespace platform
|
|
|
|
#define DEVICE
|
|
|
|
} // namespace paddle
|
|
|
|
#define HOST
|
|
|
|
|
|
|
|
#endif
|
|
|
|
#include "paddle/fluid/platform/hostdevice.h"
|
|
|
|
|
|
|
|
#include "unsupported/Eigen/CXX11/Tensor"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
|
namespace platform {
|
|
|
|
namespace platform {
|
|
|
@ -73,7 +62,7 @@ namespace platform {
|
|
|
|
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
|
|
|
|
// Use PADDLE_ALIGNED(2) to ensure that each float16 will be allocated
|
|
|
|
// and aligned at least on a 2-byte boundary, which leads to efficient
|
|
|
|
// and aligned at least on a 2-byte boundary, which leads to efficient
|
|
|
|
// memory access of float16 struct and also makes float16 compatible
|
|
|
|
// memory access of float16 struct and also makes float16 compatible
|
|
|
|
// with CUDA half, ARM float16_t, and Eigen::half data types.
|
|
|
|
// with CUDA half, ARM float16_t data types.
|
|
|
|
struct PADDLE_ALIGN(2) float16 {
|
|
|
|
struct PADDLE_ALIGN(2) float16 {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
uint16_t x;
|
|
|
|
uint16_t x;
|
|
|
@ -100,8 +89,6 @@ struct PADDLE_ALIGN(2) float16 {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif // PADDLE_CUDA_FP16
|
|
|
|
#endif // PADDLE_CUDA_FP16
|
|
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline explicit float16(const Eigen::half& h) : x(h.x) {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
// __fp16 is a native half precision data type for arm cpu,
|
|
|
|
// __fp16 is a native half precision data type for arm cpu,
|
|
|
|
// float16_t is an alias for __fp16
|
|
|
|
// float16_t is an alias for __fp16
|
|
|
@ -163,11 +150,6 @@ struct PADDLE_ALIGN(2) float16 {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16& operator=(const Eigen::half& rhs) {
|
|
|
|
|
|
|
|
x = rhs.x;
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
HOSTDEVICE inline float16& operator=(const float16_t& rhs) {
|
|
|
|
HOSTDEVICE inline float16& operator=(const float16_t& rhs) {
|
|
|
|
x = *reinterpret_cast<const uint16_t*>(&rhs);
|
|
|
|
x = *reinterpret_cast<const uint16_t*>(&rhs);
|
|
|
@ -245,12 +227,6 @@ struct PADDLE_ALIGN(2) float16 {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif // PADDLE_CUDA_FP16
|
|
|
|
#endif // PADDLE_CUDA_FP16
|
|
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE inline explicit operator Eigen::half() const {
|
|
|
|
|
|
|
|
Eigen::half h;
|
|
|
|
|
|
|
|
h.x = x;
|
|
|
|
|
|
|
|
return h;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
#ifdef PADDLE_WITH_NATIVE_FP16
|
|
|
|
HOSTDEVICE inline explicit operator float16_t() const {
|
|
|
|
HOSTDEVICE inline explicit operator float16_t() const {
|
|
|
|
return *reinterpret_cast<const float16_t*>(this);
|
|
|
|
return *reinterpret_cast<const float16_t*>(this);
|
|
|
@ -1108,105 +1084,3 @@ HOSTDEVICE inline paddle::platform::float16 abs(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace std
|
|
|
|
} // namespace std
|
|
|
|
|
|
|
|
|
|
|
|
namespace Eigen {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
using float16 = paddle::platform::float16;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
struct NumTraits<float16> : GenericNumTraits<float16> {
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
|
|
|
IsSigned = true,
|
|
|
|
|
|
|
|
IsInteger = false,
|
|
|
|
|
|
|
|
IsComplex = false,
|
|
|
|
|
|
|
|
RequireInitialization = false
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 epsilon() {
|
|
|
|
|
|
|
|
return paddle::platform::raw_uint16_to_float16(0x0800);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 dummy_precision() { return float16(1e-2f); }
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 highest() {
|
|
|
|
|
|
|
|
return paddle::platform::raw_uint16_to_float16(0x7bff);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 lowest() {
|
|
|
|
|
|
|
|
return paddle::platform::raw_uint16_to_float16(0xfbff);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 infinity() {
|
|
|
|
|
|
|
|
return paddle::platform::raw_uint16_to_float16(0x7c00);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
HOSTDEVICE static inline float16 quiet_NaN() {
|
|
|
|
|
|
|
|
return paddle::platform::raw_uint16_to_float16(0x7c01);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace numext {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool(isnan)(const float16& a) {
|
|
|
|
|
|
|
|
return (paddle::platform::isnan)(a);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool(isinf)(const float16& a) {
|
|
|
|
|
|
|
|
return (paddle::platform::isinf)(a);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline bool(isfinite)(const float16& a) {
|
|
|
|
|
|
|
|
return (paddle::platform::isfinite)(a);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 exp(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::expf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 erf(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::erff(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 log(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::logf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 tanh(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::tanhf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 sqrt(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::sqrtf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 ceil(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::ceilf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 floor(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::floorf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 round(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::roundf(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 pow(const float16& a, const float16& b) {
|
|
|
|
|
|
|
|
return float16(::powf(static_cast<float>(a), static_cast<float>(b)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
|
|
|
HOSTDEVICE inline float16 abs(const float16& a) {
|
|
|
|
|
|
|
|
return float16(::fabs(static_cast<float>(a)));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace numext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace Eigen
|
|
|
|
|
|
|
|