You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
5.8 KiB
234 lines
5.8 KiB
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#ifndef HL_BASE_H_
|
|
#define HL_BASE_H_
|
|
|
|
#include <cstddef>
|
|
|
|
#ifdef PADDLE_TYPE_DOUBLE
|
|
#define HL_FLOAT_MAX 3.40282347e+38F
|
|
#define HL_FLOAT_MIN 1.17549435e-38F
|
|
using real = double;
|
|
#else
|
|
#define HL_FLOAT_MAX 1.7976931348623157e+308
|
|
#define HL_FLOAT_MIN 2.2250738585072014e-308
|
|
using real = float;
|
|
#endif
|
|
|
|
/**
|
|
* The maximum input value for exp, used to avoid overflow problem.
|
|
* currently only used for tanh function.
|
|
*/
|
|
#define EXP_MAX_INPUT 40.0
|
|
|
|
/**
|
|
* @brief DIVUP(x, y) is similar to ceil(x / y).
|
|
* @note For CUDA, DIVUP will be used to specify
|
|
* the size of blockDim.
|
|
*/
|
|
#ifndef DIVUP
|
|
#define DIVUP(x, y) (((x) + (y)-1) / (y))
|
|
#endif
|
|
|
|
/**
|
|
* HPPL is an internal high performance parallel computing library
|
|
* for high-level neural network routines, which can support many
|
|
* heterogeneous compute architectures, such as GPU, FPGA, etc.
|
|
*/
|
|
|
|
/**
|
|
* @brief HPPL CUDA Stream.
|
|
*
|
|
* @note Each thread can use HPPL_STREAM_* after calling hl_init.
|
|
* HPPL_STREAM_DEFAULT is HPPL default stream.
|
|
*/
|
|
typedef enum {
|
|
HPPL_STREAM_DEFAULT = 0, /* Thread Default Stream*/
|
|
HPPL_STREAM_1 = 1,
|
|
HPPL_STREAM_2 = 2,
|
|
HPPL_STREAM_3 = 3,
|
|
HPPL_STREAM_4 = 4,
|
|
HPPL_THREAD_STREAM_1 = 5,
|
|
HPPL_THREAD_STREAM_2 = 6,
|
|
HPPL_THREAD_STREAM_3 = 7,
|
|
HPPL_THREAD_STREAM_4 = 8,
|
|
HPPL_STREAM_END
|
|
} hl_stream_t;
|
|
|
|
/**
|
|
* @brief HPPL activation mode.
|
|
*/
|
|
typedef enum {
|
|
HL_ACTIVATION_SIGMOID = 0,
|
|
HL_ACTIVATION_RELU = 1,
|
|
HL_ACTIVATION_TANH = 2,
|
|
HL_ACTIVATION_LINEAR = 3,
|
|
HL_ACTIVATION_END
|
|
} hl_activation_mode_t;
|
|
|
|
/**
|
|
* @brief Transpose type.
|
|
*/
|
|
typedef enum {
|
|
HPPL_OP_N = 0, /* transpose */
|
|
HPPL_OP_T = 1, /* non transpose */
|
|
HPPL_OP_END
|
|
} hl_trans_op_t;
|
|
|
|
/**
|
|
* @brief Lstm value.
|
|
*
|
|
* @param gateValue input value.
|
|
* @param prevStateValue previous state value.
|
|
* @param stateValue state value.
|
|
* @param stateActiveValue state active value.
|
|
* @param outputValue output value.
|
|
*/
|
|
typedef struct {
|
|
real *gateValue;
|
|
real *prevStateValue;
|
|
real *stateValue;
|
|
real *stateActiveValue;
|
|
real *outputValue;
|
|
real *checkIg;
|
|
real *checkFg;
|
|
real *checkOg;
|
|
} hl_lstm_value;
|
|
|
|
/**
|
|
* @brief Lstm gradient.
|
|
*
|
|
* @param gateGrad input gradient.
|
|
* @param prevStateGrad previous state gradient.
|
|
* @param stateGrad state gradient.
|
|
* @param stateActiveGrad state active gradient.
|
|
* @param outputGrad output gradient.
|
|
*/
|
|
typedef struct {
|
|
real *gateGrad;
|
|
real *prevStateGrad;
|
|
real *stateGrad;
|
|
real *stateActiveGrad;
|
|
real *outputGrad;
|
|
real *checkIgGrad;
|
|
real *checkFgGrad;
|
|
real *checkOgGrad;
|
|
} hl_lstm_grad;
|
|
|
|
/**
|
|
* @brief Gru value.
|
|
*
|
|
* @param gateWeight gate weight (updateGate + resetGate).
|
|
* @param stateWeight frame state weight.
|
|
* @param gateValue gate value results.
|
|
* @param resetOutputValue resetOutput value.
|
|
* @param outputValue output value.
|
|
* @param prevOutValue previous output value.
|
|
*
|
|
*/
|
|
typedef struct {
|
|
real *gateWeight;
|
|
real *stateWeight;
|
|
real *gateValue;
|
|
real *resetOutputValue;
|
|
real *outputValue;
|
|
real *prevOutValue;
|
|
} hl_gru_value;
|
|
|
|
/**
|
|
* @brief Gru gradient.
|
|
*
|
|
* @param gateWeightGrad gate weight gradient.
|
|
* @param stateWeightGrad frame state weight gradient.
|
|
* @param gateGrad gate gradient results.
|
|
* @param resetOutputGrad resetOutput gradient.
|
|
* @param outputGrad output gradient.
|
|
* @param prevOutGrad previous output gradient.
|
|
*/
|
|
typedef struct {
|
|
real *gateWeightGrad;
|
|
real *stateWeightGrad;
|
|
real *gateGrad;
|
|
real *resetOutputGrad;
|
|
real *outputGrad;
|
|
real *prevOutGrad;
|
|
} hl_gru_grad;
|
|
|
|
/**
|
|
* @brief Sparse matrix value type.
|
|
*/
|
|
typedef enum {
|
|
HL_NO_VALUE = 0, /* matrix values only 0 or 1 */
|
|
HL_FLOAT_VALUE = 1,
|
|
HL_VALUE_END
|
|
} hl_matrix_value_t;
|
|
|
|
/**
|
|
* @brief HPPL matrix format.
|
|
*/
|
|
typedef enum {
|
|
HL_SPARSE_CSR = 0,
|
|
HL_SPARSE_CSC = 1,
|
|
HL_SPARSE_END
|
|
} hl_matrix_format_t;
|
|
|
|
typedef struct _hl_matrix_s *hl_matrix_s;
|
|
|
|
/**
|
|
* @brief HPPL sparse matrix.
|
|
*
|
|
* @param matrix sparse matrix.
|
|
* @param format matrix format.
|
|
* @param type the type of matrix values.
|
|
* @param rows matrix rows.
|
|
* @param cols matrix columns.
|
|
* @param nnz nonzero values of sparse matrix.
|
|
*/
|
|
typedef struct {
|
|
hl_matrix_s matrix;
|
|
hl_matrix_format_t format;
|
|
hl_matrix_value_t type;
|
|
int rows;
|
|
int cols;
|
|
size_t nnz;
|
|
} _hl_sparse_matrix_s, *hl_sparse_matrix_s;
|
|
|
|
#ifdef __NVCC__
|
|
|
|
#include "cuda_runtime.h"
|
|
#include "hl_cuda.h"
|
|
#include "paddle/utils/Logging.h"
|
|
|
|
extern __thread bool g_sync_flag;
|
|
extern __thread cudaStream_t default_stream;
|
|
#define STREAM_DEFAULT default_stream
|
|
|
|
/**
|
|
* @brief Check cuda kernel execution.
|
|
* @param msg error string
|
|
*/
|
|
#define CHECK_SYNC(msg) \
|
|
if (true == g_sync_flag) { \
|
|
hl_stream_synchronize(HPPL_STREAM_DEFAULT); \
|
|
cudaError_t err = (cudaError_t)hl_get_device_last_error(); \
|
|
CHECK_EQ(cudaSuccess, err) \
|
|
<< "[" << msg << "] " \
|
|
<< "CUDA error: " << hl_get_device_error_string((size_t)err); \
|
|
}
|
|
|
|
#endif /* __NVCC__ */
|
|
|
|
#endif /* HL_BASE_H_ */
|