remove redundant HPPL_TYPE_DOUBLE (#200)

avx_docs
gangliao 9 years ago committed by Yu Yang
parent 91df606280
commit c13bdb15cd

@ -104,7 +104,7 @@ else()
endif(NOT WITH_GPU) endif(NOT WITH_GPU)
if(WITH_DOUBLE) if(WITH_DOUBLE)
add_definitions(-DPADDLE_TYPE_DOUBLE -DHPPL_TYPE_DOUBLE) add_definitions(-DPADDLE_TYPE_DOUBLE)
set(ACCURACY double) set(ACCURACY double)
else(WITH_DOUBLE) else(WITH_DOUBLE)
set(ACCURACY float) set(ACCURACY float)

@ -185,7 +185,7 @@ typedef struct {
size_t nnz; size_t nnz;
} _hl_sparse_matrix_s, *hl_sparse_matrix_s; } _hl_sparse_matrix_s, *hl_sparse_matrix_s;
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
/** /**
* HPPL data type: real (float or double) * HPPL data type: real (float or double)
* *

@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/math/MathFunctions.h" #include "paddle/math/MathFunctions.h"
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
#define CBLAS_GEMM paddle::gemm<float> #define CBLAS_GEMM paddle::gemm<float>
#else #else
#define CBLAS_GEMM paddle::gemm<double> #define CBLAS_GEMM paddle::gemm<double>

@ -28,7 +28,7 @@ namespace hppl {
const real min = SIGMOID_THRESHOLD_MIN; const real min = SIGMOID_THRESHOLD_MIN;
const real max = SIGMOID_THRESHOLD_MAX; const real max = SIGMOID_THRESHOLD_MAX;
real tmp = (a < min) ? min : ((a > max) ? max : a); real tmp = (a < min) ? min : ((a > max) ? max : a);
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
return __fdividef(1.0f, 1.0f + __expf(-tmp)); return __fdividef(1.0f, 1.0f + __expf(-tmp));
#else #else
return 1.0 / (1.0 + exp(-tmp)); return 1.0 / (1.0 + exp(-tmp));
@ -36,7 +36,7 @@ namespace hppl {
} }
__device__ static real tanh(const real a) { __device__ static real tanh(const real a) {
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
return __fdividef(2.0f, (1.0f + __expf(-2.0f*a))) - 1.0f; return __fdividef(2.0f, (1.0f + __expf(-2.0f*a))) - 1.0f;
#else #else
return (2.0 / (1.0 + exp(-2.0*a))) - 1.0; return (2.0 / (1.0 + exp(-2.0*a))) - 1.0;

@ -30,7 +30,7 @@ limitations under the License. */
#define INLINE inline #define INLINE inline
#endif #endif
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
#define DEVICE_FMAX fmaxf #define DEVICE_FMAX fmaxf
#define DEVICE_FMIN fminf #define DEVICE_FMIN fminf
#else #else

@ -21,7 +21,7 @@ limitations under the License. */
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
// typedef void* vecType; // typedef void* vecType;
#include <vector_types.h> #include <vector_types.h>
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
typedef float4 vecType; typedef float4 vecType;
#else #else
typedef double2 vecType; typedef double2 vecType;
@ -30,7 +30,7 @@ typedef double2 vecType;
#include <mmintrin.h> #include <mmintrin.h>
#include <xmmintrin.h> #include <xmmintrin.h>
#include <emmintrin.h> #include <emmintrin.h>
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
typedef __m128 vecType; typedef __m128 vecType;
#else #else
typedef __m128d vecType; typedef __m128d vecType;

@ -20,7 +20,7 @@ limitations under the License. */
#define VECTOR_SIZE 16 #define VECTOR_SIZE 16
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
/* number of float in vector */ /* number of float in vector */
#define VECTOR_LEN 4 #define VECTOR_LEN 4
#define VECTOR_SET _mm_set_ps1 #define VECTOR_SET _mm_set_ps1
@ -41,7 +41,7 @@ inline bool hl_check_align(void *ptr) {
return hl_check_align(reinterpret_cast<size_t>(ptr)); return hl_check_align(reinterpret_cast<size_t>(ptr));
} }
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
template <class Agg> template <class Agg>
inline real hl_agg_op(Agg agg, vecType mm) { inline real hl_agg_op(Agg agg, vecType mm) {
__m128 lo = _mm_unpacklo_ps(mm, mm); __m128 lo = _mm_unpacklo_ps(mm, mm);

@ -84,7 +84,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
} /* namespace dynload */ } /* namespace dynload */
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
#define CUBLAS_GEAM dynload::cublasSgeam #define CUBLAS_GEAM dynload::cublasSgeam
#define CUBLAS_GEMV dynload::cublasSgemv #define CUBLAS_GEMV dynload::cublasSgemv
#define CUBLAS_GEMM dynload::cublasSgemm #define CUBLAS_GEMM dynload::cublasSgemm

@ -340,7 +340,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc,
(cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor)); (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
CHECK_NOTNULL(hl_desc); CHECK_NOTNULL(hl_desc);
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t data_type = CUDNN_DATA_FLOAT; cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else #else
cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@ -373,7 +373,7 @@ void hl_create_tensor_descriptor(hl_tensor_descriptor* image_desc) {
(cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor)); (cudnn_tensor_descriptor)malloc(sizeof(_cudnn_tensor_descriptor));
CHECK_NOTNULL(hl_desc); CHECK_NOTNULL(hl_desc);
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t data_type = CUDNN_DATA_FLOAT; cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else #else
cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@ -611,7 +611,7 @@ void hl_create_filter_descriptor(hl_filter_descriptor* filter,
CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc)); CHECK_CUDNN(dynload::cudnnCreateFilterDescriptor(&hl_filter->desc));
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t data_type = CUDNN_DATA_FLOAT; cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else #else
cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@ -921,7 +921,7 @@ void hl_softmax_forward(real *input,
int height, int height,
int width) int width)
{ {
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t data_type = CUDNN_DATA_FLOAT; cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else #else
cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;
@ -955,7 +955,7 @@ void hl_softmax_backward(real *output_value,
int height, int height,
int width) int width)
{ {
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
cudnnDataType_t data_type = CUDNN_DATA_FLOAT; cudnnDataType_t data_type = CUDNN_DATA_FLOAT;
#else #else
cudnnDataType_t data_type = CUDNN_DATA_DOUBLE; cudnnDataType_t data_type = CUDNN_DATA_DOUBLE;

@ -626,7 +626,7 @@ void hl_specify_devices_start(int* device, int number) {
void hl_rand(real *dest_d, size_t num) { void hl_rand(real *dest_d, size_t num) {
pthread_mutex_lock(t_resource.gen_mutex); pthread_mutex_lock(t_resource.gen_mutex);
CHECK_EQ( CHECK_EQ(
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
dynload::curandGenerateUniform(t_resource.gen, dest_d, num), dynload::curandGenerateUniform(t_resource.gen, dest_d, num),
#else #else
dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num), dynload::curandGenerateUniformDouble(t_resource.gen, dest_d, num),

@ -47,7 +47,7 @@ void hl_matrix_add(real *A_d,
CHECK_SYNC("hl_matrix_add failed"); CHECK_SYNC("hl_matrix_add failed");
} }
#ifdef HPPL_TYPE_DOUBLE #ifdef PADDLE_TYPE_DOUBLE
#define THRESHOLD 128 #define THRESHOLD 128
#else #else
#define THRESHOLD 64 #define THRESHOLD 64
@ -102,7 +102,7 @@ void subMaxAndExp(real* I,
val = -THRESHOLD; val = -THRESHOLD;
} }
I[nextIdx] = val; I[nextIdx] = val;
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
O[nextIdx] = __expf(val); O[nextIdx] = __expf(val);
#else #else
O[nextIdx] = exp(val); O[nextIdx] = exp(val);

@ -355,7 +355,7 @@ __global__ void KeSMatrixCscMulDense(real *C_d,
} }
/* best perf */ /* best perf */
#ifndef HPPL_TYPE_DOUBLE #ifndef PADDLE_TYPE_DOUBLE
#define CU_CSCMM_THREAD_M_BEST 9 #define CU_CSCMM_THREAD_M_BEST 9
#else #else
#define CU_CSCMM_THREAD_M_BEST 4 #define CU_CSCMM_THREAD_M_BEST 4

Loading…
Cancel
Save