gpu support cuda-11.0 and cudnn-8.0

pull/13417/head
VectorSL 4 years ago
parent a1621dbb77
commit 8fd1bf6f62

@ -357,15 +357,11 @@ checkopts()
if [[ "X$DEVICE_VERSION" == "X" ]]; then if [[ "X$DEVICE_VERSION" == "X" ]]; then
DEVICE_VERSION=10.1 DEVICE_VERSION=10.1
fi fi
if [[ "X$DEVICE_VERSION" != "X9.2" && "X$DEVICE_VERSION" != "X10.1" ]]; then if [[ "X$DEVICE_VERSION" != "X11.1" && "X$DEVICE_VERSION" != "X10.1" ]]; then
echo "Invalid value ${DEVICE_VERSION} for option -V" echo "Invalid value ${DEVICE_VERSION} for option -V"
usage usage
exit 1 exit 1
fi fi
if [[ "X$DEVICE_VERSION" == "X9.2" ]]; then
echo "Unsupported CUDA version 9.2"
exit 1
fi
CUDA_VERSION="$DEVICE_VERSION" CUDA_VERSION="$DEVICE_VERSION"
elif [[ "X$DEVICE" == "Xd" || "X$DEVICE" == "Xascend" ]]; then elif [[ "X$DEVICE" == "Xd" || "X$DEVICE" == "Xascend" ]]; then
# version default 910 # version default 910

@ -45,7 +45,7 @@ class GPUEnvChecker(EnvChecker):
"""GPU environment check.""" """GPU environment check."""
def __init__(self): def __init__(self):
self.version = ["10.1"] self.version = ["10.1", "11.1"]
self.lib_key_to_lib_name = {'libcu': 'libcuda.so'} self.lib_key_to_lib_name = {'libcu': 'libcuda.so'}
# env # env
self.path = os.getenv("PATH") self.path = os.getenv("PATH")

@ -127,7 +127,11 @@ if(ENABLE_GPU)
endif() endif()
set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set(NVCC_TMP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if(${CUDA_VERSION} VERSION_LESS 11.0)
string(REPLACE "-std=c++17" "-std=c++11" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
else()
string(REPLACE "-std=c++17" "-std=c++14" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) set_property(SOURCE ${GPU_SRC_LIST} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST}) cuda_add_library(gpu_cuda_lib STATIC ${GPU_SRC_LIST})
set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS}) set(CMAKE_CXX_FLAGS ${NVCC_TMP_CMAKE_CXX_FLAGS})

@ -310,23 +310,15 @@ class Conv2dGpuFwdKernel : public GpuKernel {
"cudnnSetTensor4dDescriptor failed"); "cudnnSetTensor4dDescriptor failed");
} }
void SelectAlgorithm(cudnnTensorDescriptor_t input_descriptor_real) { void SelectAlgorithm(cudnnTensorDescriptor_t input_descriptor_real) {
if (group_ > 1 || CUDNN_MAJOR < 7) { constexpr int requested_algo_count = 1;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, int returned_algo_count = 0;
cudnnGetConvolutionForwardAlgorithm( cudnnConvolutionFwdAlgoPerf_t perf_results;
cudnn_handle_, input_descriptor_real, filter_desc_, conv_desc_, output_desc_, CHECK_CUDNN_RET_WITH_EXCEPT(
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, 0, &conv_algorithm_), kernel_node_,
"cudnnGetConvolutionForwardAlgorithm failed"); cudnnGetConvolutionForwardAlgorithm_v7(cudnn_handle_, input_descriptor_real, filter_desc_, conv_desc_,
} else { output_desc_, requested_algo_count, &returned_algo_count, &perf_results),
constexpr int requested_algo_count = 1; "cudnnGetConvolutionForwardAlgorithm_v7 failed");
int returned_algo_count; conv_algorithm_ = perf_results.algo;
cudnnConvolutionFwdAlgoPerf_t perf_results;
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnGetConvolutionForwardAlgorithm_v7(cudnn_handle_, input_descriptor_real, filter_desc_, conv_desc_,
output_desc_, requested_algo_count, &returned_algo_count, &perf_results),
"cudnnGetConvolutionForwardAlgorithm_v7 failed");
conv_algorithm_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) { if (cudnn_data_type_ == CUDNN_DATA_HALF) {
conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
} }

@ -280,23 +280,15 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
return true; return true;
} }
void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) { void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
if (group_ > 1 || CUDNN_MAJOR < 7) { constexpr int requested_algo_count = 1;
CHECK_CUDNN_RET_WITH_EXCEPT( int returned_algo_count = 0;
kernel_node_, cudnnConvolutionBwdFilterAlgoPerf_t perf_results;
cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle_, x_desc_real, dy_desc_, conv_desc_, dw_desc_, CHECK_CUDNN_RET_WITH_EXCEPT(
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, 0, &algo_), kernel_node_,
"GetConvolutionBackwardFilterAlgorithm failed"); cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnn_handle_, x_desc_real, dy_desc_, conv_desc_, dw_desc_,
} else { requested_algo_count, &returned_algo_count, &perf_results),
constexpr int requested_algo_count = 1; "GetConvolutionBackwardFilterAlgorithm failed");
int returned_algo_count; algo_ = perf_results.algo;
cudnnConvolutionBwdFilterAlgoPerf_t perf_results;
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnn_handle_, x_desc_real, dy_desc_, conv_desc_, dw_desc_,
requested_algo_count, &returned_algo_count, &perf_results),
"GetConvolutionBackwardFilterAlgorithm failed");
algo_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) { if (cudnn_data_type_ == CUDNN_DATA_HALF) {
algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1;
} }

@ -289,23 +289,15 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
[](const int64_t &value) { return static_cast<int>(value); }); [](const int64_t &value) { return static_cast<int>(value); });
} }
void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
if (group_ > 1 || CUDNN_MAJOR < 7) { constexpr int requested_algo_count = 1;
CHECK_CUDNN_RET_WITH_EXCEPT( int returned_algo_count = 0;
kernel_node_, cudnnConvolutionBwdDataAlgoPerf_t perf_results;
cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle_, w_desc_, dy_desc_, conv_desc_, dx_desc_real, CHECK_CUDNN_RET_WITH_EXCEPT(
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, 0, &algo_), kernel_node_,
"cudnnGetConvolutionBackwardDataAlgorithm failed"); cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnn_handle_, w_desc_, dy_desc_, conv_desc_, dx_desc_real,
} else { requested_algo_count, &returned_algo_count, &perf_results),
constexpr int requested_algo_count = 1; "cudnnGetConvolutionBackwardDataAlgorithm_v7 failed");
int returned_algo_count; algo_ = perf_results.algo;
cudnnConvolutionBwdDataAlgoPerf_t perf_results;
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnn_handle_, w_desc_, dy_desc_, conv_desc_, dx_desc_real,
requested_algo_count, &returned_algo_count, &perf_results),
"cudnnGetConvolutionBackwardDataAlgorithm_v7 failed");
algo_ = perf_results.algo;
}
if (cudnn_data_type_ == CUDNN_DATA_HALF) { if (cudnn_data_type_ == CUDNN_DATA_HALF) {
algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1;
} }

@ -125,12 +125,21 @@ class LstmGpuKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0), cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
"set dropout_desc failed"); "set dropout_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
#if CUDNN_VERSION < 8000
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_, cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
input_mode, direction, rnn_mode, algo, cudnn_data_type_), input_mode, direction, rnn_mode, algo, cudnn_data_type_),
"set rnn_desc failed"); "set rnn_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed"); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
#else
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
"set rnn_desc failed");
#endif
auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,

@ -140,12 +140,21 @@ class LstmGradDataGpuKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0), cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
"set dropout_desc failed"); "set dropout_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
#if CUDNN_VERSION < 8000
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_, cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
input_mode, direction, rnn_mode, algo, cudnn_data_type_), input_mode, direction, rnn_mode, algo, cudnn_data_type_),
"set rnn_desc failed"); "set rnn_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed"); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
#else
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
"set rnn_desc failed");
#endif
auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4); auto weight_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,

@ -114,13 +114,21 @@ class LstmGradWeightGpuKernel : public GpuKernel {
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0), cudnnSetDropoutDescriptor(dropout_desc_, handle_, dropout_, nullptr, 0, 0),
"set dropout_desc failed"); "set dropout_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
#if CUDNN_VERSION < 8000
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_, cudnnSetRNNDescriptor_v6(handle_, rnn_desc_, hidden_size_, num_layers_, dropout_desc_,
input_mode, direction, rnn_mode, algo, cudnn_data_type_), input_mode, direction, rnn_mode, algo, cudnn_data_type_),
"set rnn_desc failed"); "set rnn_desc failed");
cudnnRNNBiasMode_t bias_mode = has_bias_ ? CUDNN_RNN_DOUBLE_BIAS : CUDNN_RNN_NO_BIAS;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed"); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnSetRNNBiasMode(rnn_desc_, bias_mode), "set bias_mode failed");
#else
cudnnMathType_t math_type = (cudnn_data_type_ == CUDNN_DATA_HALF) ? CUDNN_TENSOR_OP_MATH : CUDNN_FMA_MATH;
CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetRNNDescriptor_v8(rnn_desc_, algo, rnn_mode, bias_mode, direction, input_mode,
cudnn_data_type_, cudnn_data_type_, math_type, input_size_,
hidden_size_, hidden_size_, num_layers_, dropout_desc_, 0),
"set rnn_desc failed");
#endif
auto weight_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); auto weight_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T); size_t weight_size = weight_shape[0] * weight_shape[1] * weight_shape[2] * sizeof(T);

Loading…
Cancel
Save