diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h index db2a88a927..cc6293e9bc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h @@ -50,8 +50,8 @@ class ArrayReduceGpuKernel : public GpuKernel { T *output_addr = GetDeviceAddress(outputs, 0); T *workspace_addr = GetDeviceAddress(workspace, 0); - const float alpha = 1; - const float beta = 0; + T alpha = static_cast(1.0f); + T beta = static_cast(0.0f); if (all_match_) { MS_LOG(DEBUG) << "The corresponding dimensions of the input and output tensors all match. No need to call cuDNN kernel."; @@ -60,11 +60,21 @@ class ArrayReduceGpuKernel : public GpuKernel { reinterpret_cast(stream_ptr)), "cudaMemcpyAsync failed in ArrayReduceGpuKernel::Launch."); } else { - CHECK_CUDNN_RET_WITH_EXCEPT( - kernel_node_, - cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_, &alpha, - inputA_descriptor_, input_addr, &beta, outputC_descriptor_, output_addr), - "cudnnReduceTensor failed."); + if (data_type_ == CUDNN_DATA_DOUBLE) { + CHECK_CUDNN_RET_WITH_EXCEPT( + kernel_node_, + cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_, + &alpha, inputA_descriptor_, input_addr, &beta, outputC_descriptor_, output_addr), + "cudnnReduceTensor failed."); + } else { + const float alphaf = static_cast(alpha); + const float betaf = static_cast(beta); + CHECK_CUDNN_RET_WITH_EXCEPT( + kernel_node_, + cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_, + &alphaf, inputA_descriptor_, input_addr, &betaf, outputC_descriptor_, output_addr), + "cudnnReduceTensor failed."); + } } return true; } @@ -194,12 +204,12 @@ class ArrayReduceGpuKernel : public GpuKernel { MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; } reduce_tensor_op_ = iter->second; - - CHECK_CUDNN_RET_WITH_EXCEPT( - kernel_node_, - cudnnSetReduceTensorDescriptor(reduce_tensor_descriptor_, reduce_tensor_op_, CUDNN_DATA_FLOAT, nan_prop_, - reduce_indices_, CUDNN_32BIT_INDICES), - "cudnnSetReduceTensorDescriptor failed"); + // add check for float64 + cudnnDataType_t comp_type = (data_type_ == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT; + CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, + cudnnSetReduceTensorDescriptor(reduce_tensor_descriptor_, reduce_tensor_op_, comp_type, + nan_prop_, reduce_indices_, CUDNN_32BIT_INDICES), + "cudnnSetReduceTensorDescriptor failed"); return; } void InferInAndOutDesc(const std::vector &input_shape, const std::vector &output_shape) { diff --git a/tests/st/ops/gpu/test_reduce_max_op.py b/tests/st/ops/gpu/test_reduce_max_op.py index 46943b4bbb..9180898d63 100644 --- a/tests/st/ops/gpu/test_reduce_max_op.py +++ b/tests/st/ops/gpu/test_reduce_max_op.py @@ -232,3 +232,12 @@ def test_reduce_max_float64(): error = np.ones(shape=expect.shape) * 1.0e-5 assert np.all(diff < error) assert output.shape == expect.shape + + context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") + net = ReduceMaxTypeNet(np.float64) + output = net() + expect = np.max(x0, axis=axis0, keepdims=keep_dims0).astype(np.float64) + diff = abs(output.asnumpy() - expect) + error = np.ones(shape=expect.shape) * 1.0e-5 + assert np.all(diff < error) + assert output.shape == expect.shape diff --git a/tests/st/ops/gpu/test_reduce_sum_op.py b/tests/st/ops/gpu/test_reduce_sum_op.py index 708deb2184..41e6ef7283 100644 --- a/tests/st/ops/gpu/test_reduce_sum_op.py +++ b/tests/st/ops/gpu/test_reduce_sum_op.py @@ -324,3 +324,12 @@ def test_reduce_sum_float64(): error = np.ones(shape=expect.shape) * 1.0e-5 assert np.all(diff < error) assert output.shape == expect.shape + + context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU") + net = ReduceSumTypeNet(np.float64) + output = net() + expect = np.sum(x0, axis=axis0, keepdims=keep_dims0).astype(np.float64) + diff = abs(output.asnumpy() - expect) + error = np.ones(shape=expect.shape) * 1.0e-5 + assert np.all(diff < error) + assert output.shape == expect.shape