!12605 Fix GPU ReduceMax/Sum float64 type failure

From: @TFbunny
Reviewed-by: @tom__chen
Signed-off-by:
pull/12605/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit d8433e9f5c

@ -50,8 +50,8 @@ class ArrayReduceGpuKernel : public GpuKernel {
T *output_addr = GetDeviceAddress<T>(outputs, 0); T *output_addr = GetDeviceAddress<T>(outputs, 0);
T *workspace_addr = GetDeviceAddress<T>(workspace, 0); T *workspace_addr = GetDeviceAddress<T>(workspace, 0);
const float alpha = 1; T alpha = static_cast<T>(1.0f);
const float beta = 0; T beta = static_cast<T>(0.0f);
if (all_match_) { if (all_match_) {
MS_LOG(DEBUG) MS_LOG(DEBUG)
<< "The corresponding dimensions of the input and output tensors all match. No need to call cuDNN kernel."; << "The corresponding dimensions of the input and output tensors all match. No need to call cuDNN kernel.";
@ -60,11 +60,21 @@ class ArrayReduceGpuKernel : public GpuKernel {
reinterpret_cast<cudaStream_t>(stream_ptr)), reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpyAsync failed in ArrayReduceGpuKernel::Launch."); "cudaMemcpyAsync failed in ArrayReduceGpuKernel::Launch.");
} else { } else {
CHECK_CUDNN_RET_WITH_EXCEPT( if (data_type_ == CUDNN_DATA_DOUBLE) {
kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(
cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_, &alpha, kernel_node_,
inputA_descriptor_, input_addr, &beta, outputC_descriptor_, output_addr), cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_,
"cudnnReduceTensor failed."); &alpha, inputA_descriptor_, input_addr, &beta, outputC_descriptor_, output_addr),
"cudnnReduceTensor failed.");
} else {
const float alphaf = static_cast<float>(alpha);
const float betaf = static_cast<float>(beta);
CHECK_CUDNN_RET_WITH_EXCEPT(
kernel_node_,
cudnnReduceTensor(cudnn_handle_, reduce_tensor_descriptor_, nullptr, 0, workspace_addr, workspace_size_,
&alphaf, inputA_descriptor_, input_addr, &betaf, outputC_descriptor_, output_addr),
"cudnnReduceTensor failed.");
}
} }
return true; return true;
} }
@ -194,12 +204,12 @@ class ArrayReduceGpuKernel : public GpuKernel {
MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported."; MS_LOG(EXCEPTION) << "Array reduce kernel type " << kernel_name << " is not supported.";
} }
reduce_tensor_op_ = iter->second; reduce_tensor_op_ = iter->second;
// add check for float64
CHECK_CUDNN_RET_WITH_EXCEPT( cudnnDataType_t comp_type = (data_type_ == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
kernel_node_, CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
cudnnSetReduceTensorDescriptor(reduce_tensor_descriptor_, reduce_tensor_op_, CUDNN_DATA_FLOAT, nan_prop_, cudnnSetReduceTensorDescriptor(reduce_tensor_descriptor_, reduce_tensor_op_, comp_type,
reduce_indices_, CUDNN_32BIT_INDICES), nan_prop_, reduce_indices_, CUDNN_32BIT_INDICES),
"cudnnSetReduceTensorDescriptor failed"); "cudnnSetReduceTensorDescriptor failed");
return; return;
} }
void InferInAndOutDesc(const std::vector<size_t> &input_shape, const std::vector<size_t> &output_shape) { void InferInAndOutDesc(const std::vector<size_t> &input_shape, const std::vector<size_t> &output_shape) {

@ -237,3 +237,12 @@ def test_reduce_max_float64():
error = np.ones(shape=expect.shape) * 1.0e-5 error = np.ones(shape=expect.shape) * 1.0e-5
assert np.all(diff < error) assert np.all(diff < error)
assert output.shape == expect.shape assert output.shape == expect.shape
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
net = ReduceMaxTypeNet(np.float64)
output = net()
expect = np.max(x0, axis=axis0, keepdims=keep_dims0).astype(np.float64)
diff = abs(output.asnumpy() - expect)
error = np.ones(shape=expect.shape) * 1.0e-5
assert np.all(diff < error)
assert output.shape == expect.shape

@ -329,3 +329,12 @@ def test_reduce_sum_float64():
error = np.ones(shape=expect.shape) * 1.0e-5 error = np.ones(shape=expect.shape) * 1.0e-5
assert np.all(diff < error) assert np.all(diff < error)
assert output.shape == expect.shape assert output.shape == expect.shape
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
net = ReduceSumTypeNet(np.float64)
output = net()
expect = np.sum(x0, axis=axis0, keepdims=keep_dims0).astype(np.float64)
diff = abs(output.asnumpy() - expect)
error = np.ones(shape=expect.shape) * 1.0e-5
assert np.all(diff < error)
assert output.shape == expect.shape

Loading…
Cancel
Save