|
|
|
@ -110,11 +110,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
|
|
|
|
|
|
|
|
|
|
auto x_dims = framework::vectorize(input->dims());
|
|
|
|
|
auto f_dims = framework::vectorize(filter->dims());
|
|
|
|
|
if (activation == "identity") {
|
|
|
|
|
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
|
|
|
|
|
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
|
|
|
|
|
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
|
|
|
|
} else if (!exhaustive_search) {
|
|
|
|
|
if (!exhaustive_search) {
|
|
|
|
|
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
|
|
|
|
|
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
|
|
|
|
|
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
|
|
|
|
@ -165,6 +161,29 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
|
|
|
|
|
PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit,
|
|
|
|
|
"workspace_size to be allocated exceeds the limit");
|
|
|
|
|
|
|
|
|
|
if ((activation == "identity") &&
|
|
|
|
|
(algo != CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) &&
|
|
|
|
|
(!residual)) {
|
|
|
|
|
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
|
|
|
|
|
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
|
|
|
|
|
// But test in some case, the speed is slower, change to use
|
|
|
|
|
// cudnnConvolutionForward and cudnnAddTensor
|
|
|
|
|
// ------------- cudnn conv forward and bias add ---------------------
|
|
|
|
|
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
|
|
|
|
|
auto cudnn_func = [&](void* cudnn_workspace) {
|
|
|
|
|
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
|
|
|
|
|
handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc,
|
|
|
|
|
filter_data, cudnn_conv_desc, algo, cudnn_workspace,
|
|
|
|
|
workspace_size_in_bytes, &beta, cudnn_output_desc, output_data));
|
|
|
|
|
};
|
|
|
|
|
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
|
|
|
|
|
CUDNN_ENFORCE(platform::dynload::cudnnAddTensor(
|
|
|
|
|
handle, &alpha, cudnn_bias_desc, bias_data, &alpha, cudnn_output_desc,
|
|
|
|
|
output_data));
|
|
|
|
|
} else {
|
|
|
|
|
if (activation == "identity") {
|
|
|
|
|
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
|
|
|
|
|
}
|
|
|
|
|
// ------------------- cudnn conv+bias+act forward --------------------
|
|
|
|
|
ScalingParamType<T> alpha1 = 1.0f;
|
|
|
|
|
ScalingParamType<T> alpha2 = residual ? 1.0f : 0.0f;
|
|
|
|
@ -178,6 +197,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
|
|
|
|
|
};
|
|
|
|
|
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|