|
|
@ -221,10 +221,10 @@ inline cudnnTensorFormat_t GetCudnnTensorFormat(
|
|
|
|
class ScopedTensorDescriptor {
|
|
|
|
class ScopedTensorDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedTensorDescriptor() {
|
|
|
|
ScopedTensorDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateTensorDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedTensorDescriptor() {
|
|
|
|
~ScopedTensorDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyTensorDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
|
|
|
|
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
|
|
|
@ -243,7 +243,7 @@ class ScopedTensorDescriptor {
|
|
|
|
if (groups > 1) {
|
|
|
|
if (groups > 1) {
|
|
|
|
dims_with_group[1] = dims_with_group[1] / groups;
|
|
|
|
dims_with_group[1] = dims_with_group[1] / groups;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetTensorNdDescriptor(
|
|
|
|
desc_, type, dims_with_group.size(), dims_with_group.data(),
|
|
|
|
desc_, type, dims_with_group.size(), dims_with_group.data(),
|
|
|
|
strides.data()));
|
|
|
|
strides.data()));
|
|
|
|
return desc_;
|
|
|
|
return desc_;
|
|
|
@ -265,10 +265,10 @@ class ScopedTensorDescriptor {
|
|
|
|
class ScopedFilterDescriptor {
|
|
|
|
class ScopedFilterDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedFilterDescriptor() {
|
|
|
|
ScopedFilterDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateFilterDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedFilterDescriptor() {
|
|
|
|
~ScopedFilterDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyFilterDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
|
|
|
|
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
|
|
|
@ -284,7 +284,7 @@ class ScopedFilterDescriptor {
|
|
|
|
kernel_with_group[0] /= groups;
|
|
|
|
kernel_with_group[0] /= groups;
|
|
|
|
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
|
|
|
|
// NOTE: input filter(C) of the filter is already asserted to be C/groups.
|
|
|
|
}
|
|
|
|
}
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetFilterNdDescriptor(
|
|
|
|
desc_, type, format, kernel_with_group.size(),
|
|
|
|
desc_, type, format, kernel_with_group.size(),
|
|
|
|
kernel_with_group.data()));
|
|
|
|
kernel_with_group.data()));
|
|
|
|
return desc_;
|
|
|
|
return desc_;
|
|
|
@ -306,10 +306,12 @@ class ScopedFilterDescriptor {
|
|
|
|
class ScopedConvolutionDescriptor {
|
|
|
|
class ScopedConvolutionDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedConvolutionDescriptor() {
|
|
|
|
ScopedConvolutionDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnCreateConvolutionDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedConvolutionDescriptor() {
|
|
|
|
~ScopedConvolutionDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnDestroyConvolutionDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline cudnnConvolutionDescriptor_t descriptor(
|
|
|
|
inline cudnnConvolutionDescriptor_t descriptor(
|
|
|
@ -332,7 +334,7 @@ class ScopedConvolutionDescriptor {
|
|
|
|
|
|
|
|
|
|
|
|
cudnnDataType_t compute_type =
|
|
|
|
cudnnDataType_t compute_type =
|
|
|
|
(type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
|
|
|
|
(type == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetConvolutionNdDescriptor(
|
|
|
|
desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
|
|
|
|
desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
|
|
|
|
CUDNN_CROSS_CORRELATION, compute_type));
|
|
|
|
CUDNN_CROSS_CORRELATION, compute_type));
|
|
|
|
return desc_;
|
|
|
|
return desc_;
|
|
|
@ -353,10 +355,10 @@ class ScopedConvolutionDescriptor {
|
|
|
|
class ScopedPoolingDescriptor {
|
|
|
|
class ScopedPoolingDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedPoolingDescriptor() {
|
|
|
|
ScopedPoolingDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreatePoolingDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedPoolingDescriptor() {
|
|
|
|
~ScopedPoolingDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyPoolingDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
|
|
|
|
inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
|
|
|
@ -365,7 +367,7 @@ class ScopedPoolingDescriptor {
|
|
|
|
const std::vector<int>& strides) {
|
|
|
|
const std::vector<int>& strides) {
|
|
|
|
PADDLE_ENFORCE_EQ(kernel.size(), pads.size());
|
|
|
|
PADDLE_ENFORCE_EQ(kernel.size(), pads.size());
|
|
|
|
PADDLE_ENFORCE_EQ(kernel.size(), strides.size());
|
|
|
|
PADDLE_ENFORCE_EQ(kernel.size(), strides.size());
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetPoolingNdDescriptor(
|
|
|
|
desc_, (GetPoolingMode(mode)),
|
|
|
|
desc_, (GetPoolingMode(mode)),
|
|
|
|
CUDNN_PROPAGATE_NAN, // Always propagate nans.
|
|
|
|
CUDNN_PROPAGATE_NAN, // Always propagate nans.
|
|
|
|
kernel.size(), kernel.data(), pads.data(), strides.data()));
|
|
|
|
kernel.size(), kernel.data(), pads.data(), strides.data()));
|
|
|
@ -380,16 +382,18 @@ class ScopedPoolingDescriptor {
|
|
|
|
class ScopedSpatialTransformerDescriptor {
|
|
|
|
class ScopedSpatialTransformerDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedSpatialTransformerDescriptor() {
|
|
|
|
ScopedSpatialTransformerDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedSpatialTransformerDescriptor() {
|
|
|
|
~ScopedSpatialTransformerDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnDestroySpatialTransformerDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
|
inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
|
|
|
|
inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
|
|
|
|
const int dimA[]) {
|
|
|
|
const int dimA[]) {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnSetSpatialTransformerNdDescriptor(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnSetSpatialTransformerNdDescriptor(
|
|
|
|
desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
|
|
|
|
desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
|
|
|
|
return desc_;
|
|
|
|
return desc_;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -402,10 +406,12 @@ class ScopedSpatialTransformerDescriptor {
|
|
|
|
class ScopedActivationDescriptor {
|
|
|
|
class ScopedActivationDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedActivationDescriptor() {
|
|
|
|
ScopedActivationDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnCreateActivationDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedActivationDescriptor() {
|
|
|
|
~ScopedActivationDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
|
|
|
|
dynload::cudnnDestroyActivationDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
@ -467,15 +473,15 @@ inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
|
|
|
|
class ScopedCTCLossDescriptor {
|
|
|
|
class ScopedCTCLossDescriptor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ScopedCTCLossDescriptor() {
|
|
|
|
ScopedCTCLossDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnCreateCTCLossDescriptor(&desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnCreateCTCLossDescriptor(&desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
~ScopedCTCLossDescriptor() {
|
|
|
|
~ScopedCTCLossDescriptor() {
|
|
|
|
PADDLE_ENFORCE(dynload::cudnnDestroyCTCLossDescriptor(desc_));
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(dynload::cudnnDestroyCTCLossDescriptor(desc_));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
|
inline cudnnCTCLossDescriptor_t descriptor() {
|
|
|
|
inline cudnnCTCLossDescriptor_t descriptor() {
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
PADDLE_ENFORCE_CUDA_SUCCESS(
|
|
|
|
dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
|
|
|
|
dynload::cudnnSetCTCLossDescriptor(desc_, CudnnDataType<T>::type));
|
|
|
|
return desc_;
|
|
|
|
return desc_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|