You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
135 lines
5.6 KiB
135 lines
5.6 KiB
8 years ago
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License. */
|
||
|
|
||
|
#pragma once
|
||
|
|
||
8 years ago
|
#include <cudnn.h>
|
||
|
#include "paddle/platform/dynamic_loader.h"
|
||
|
|
||
|
namespace paddle {
|
||
8 years ago
|
namespace platform {
|
||
8 years ago
|
namespace dynload {
|
||
8 years ago
|
|
||
|
std::once_flag cudnn_dso_flag;
|
||
|
void* cudnn_dso_handle = nullptr;
|
||
|
|
||
|
#ifdef PADDLE_USE_DSO
|
||
|
|
||
8 years ago
|
#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \
|
||
|
struct DynLoad__##__name { \
|
||
|
template <typename... Args> \
|
||
|
auto operator()(Args... args) -> decltype(__name(args...)) { \
|
||
|
using cudnn_func = decltype(__name(args...)) (*)(Args...); \
|
||
|
std::call_once(cudnn_dso_flag, \
|
||
|
paddle::platform::dynload::GetCudnnDsoHandle, \
|
||
|
&cudnn_dso_handle); \
|
||
|
void* p_##__name = dlsym(cudnn_dso_handle, #__name); \
|
||
|
return reinterpret_cast<cudnn_func>(p_##__name)(args...); \
|
||
|
} \
|
||
8 years ago
|
} __name; /* struct DynLoad__##__name */
|
||
|
|
||
|
#else
|
||
|
|
||
|
#define DYNAMIC_LOAD_CUDNN_WRAP(__name) \
|
||
|
struct DynLoad__##__name { \
|
||
|
template <typename... Args> \
|
||
|
auto operator()(Args... args) -> decltype(__name(args...)) { \
|
||
|
return __name(args...); \
|
||
|
} \
|
||
|
} __name; /* struct DynLoad__##__name */
|
||
|
|
||
|
#endif
|
||
|
|
||
|
/**
|
||
|
* include all needed cudnn functions in HPPL
|
||
|
* different cudnn version has different interfaces
|
||
|
**/
|
||
|
// clang-format off
|
||
|
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
|
||
|
__macro(cudnnSetTensor4dDescriptor) \
|
||
|
__macro(cudnnSetTensor4dDescriptorEx) \
|
||
|
__macro(cudnnGetConvolutionNdForwardOutputDim) \
|
||
|
__macro(cudnnGetConvolutionForwardAlgorithm) \
|
||
|
__macro(cudnnCreateTensorDescriptor) \
|
||
|
__macro(cudnnDestroyTensorDescriptor) \
|
||
|
__macro(cudnnCreateFilterDescriptor) \
|
||
|
__macro(cudnnSetFilter4dDescriptor) \
|
||
|
__macro(cudnnSetPooling2dDescriptor) \
|
||
|
__macro(cudnnDestroyFilterDescriptor) \
|
||
|
__macro(cudnnCreateConvolutionDescriptor) \
|
||
|
__macro(cudnnCreatePoolingDescriptor) \
|
||
|
__macro(cudnnDestroyPoolingDescriptor) \
|
||
|
__macro(cudnnSetConvolution2dDescriptor) \
|
||
|
__macro(cudnnDestroyConvolutionDescriptor) \
|
||
|
__macro(cudnnCreate) \
|
||
|
__macro(cudnnDestroy) \
|
||
|
__macro(cudnnSetStream) \
|
||
|
__macro(cudnnActivationForward) \
|
||
|
__macro(cudnnConvolutionForward) \
|
||
|
__macro(cudnnConvolutionBackwardBias) \
|
||
|
__macro(cudnnGetConvolutionForwardWorkspaceSize) \
|
||
|
__macro(cudnnTransformTensor) \
|
||
|
__macro(cudnnPoolingForward) \
|
||
|
__macro(cudnnPoolingBackward) \
|
||
|
__macro(cudnnSoftmaxBackward) \
|
||
|
__macro(cudnnSoftmaxForward) \
|
||
|
__macro(cudnnGetVersion) \
|
||
|
__macro(cudnnGetErrorString)
|
||
|
CUDNN_DNN_ROUTINE_EACH(DYNAMIC_LOAD_CUDNN_WRAP)
|
||
|
|
||
|
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \
|
||
|
__macro(cudnnAddTensor) \
|
||
|
__macro(cudnnConvolutionBackwardData) \
|
||
|
__macro(cudnnConvolutionBackwardFilter)
|
||
|
CUDNN_DNN_ROUTINE_EACH_R2(DYNAMIC_LOAD_CUDNN_WRAP)
|
||
|
|
||
|
// APIs available after R3:
|
||
|
#if CUDNN_VERSION >= 3000
|
||
|
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \
|
||
|
__macro(cudnnGetConvolutionBackwardFilterWorkspaceSize) \
|
||
|
__macro(cudnnGetConvolutionBackwardDataAlgorithm) \
|
||
|
__macro(cudnnGetConvolutionBackwardFilterAlgorithm) \
|
||
|
__macro(cudnnGetConvolutionBackwardDataWorkspaceSize)
|
||
|
CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DYNAMIC_LOAD_CUDNN_WRAP)
|
||
|
#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R3
|
||
|
#endif
|
||
|
|
||
|
|
||
|
// APIs available after R4:
|
||
|
#if CUDNN_VERSION >= 4007
|
||
|
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
|
||
|
__macro(cudnnBatchNormalizationForwardTraining) \
|
||
|
__macro(cudnnBatchNormalizationForwardInference) \
|
||
|
__macro(cudnnBatchNormalizationBackward)
|
||
|
CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DYNAMIC_LOAD_CUDNN_WRAP)
|
||
|
#undef CUDNN_DNN_ROUTINE_EACH_AFTER_R4
|
||
|
#endif
|
||
|
|
||
|
// APIs in R5
|
||
|
#if CUDNN_VERSION >= 5000
|
||
|
#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \
|
||
|
__macro(cudnnCreateActivationDescriptor) \
|
||
|
__macro(cudnnSetActivationDescriptor) \
|
||
|
__macro(cudnnGetActivationDescriptor) \
|
||
|
__macro(cudnnDestroyActivationDescriptor)
|
||
|
CUDNN_DNN_ROUTINE_EACH_R5(DYNAMIC_LOAD_CUDNN_WRAP)
|
||
|
#undef CUDNN_DNN_ROUTINE_EACH_R5
|
||
|
#endif
|
||
|
|
||
|
#undef CUDNN_DNN_ROUTINE_EACH
|
||
|
// clang-format on
|
||
8 years ago
|
} // namespace dynload
|
||
8 years ago
|
} // namespace platform
|
||
8 years ago
|
} // namespace paddle
|