From 944c9ec933105c548b3a499a9fcd4c9d88fd2333 Mon Sep 17 00:00:00 2001 From: VectorSL Date: Thu, 30 Apr 2020 16:17:29 +0800 Subject: [PATCH] gpu tensoradd add shape validation check --- .../kernel/gpu/math/tensoradd_gpu_kernel.h | 8 ++++ .../ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h | 40 +++++++++---------- .../gpu/nn/conv2d_grad_filter_gpu_kernel.h | 34 +++++++--------- .../gpu/nn/conv2d_grad_input_gpu_kernel.h | 36 ++++++++--------- 4 files changed, 57 insertions(+), 61 deletions(-) diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h index 4dfbf4c3d4..f2029cf470 100644 --- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h @@ -86,6 +86,14 @@ class TensorAddGpuFwdKernel : public GpuKernel { } auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto input_shapeB = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + if (input_shape != output_shape && input_shapeB != output_shape) { + MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n" + "InputA must match the corresponding dimension of the destination tensor outC, and each " + "dimension of the inputB" + "must match the corresponding dimension of outC or must be equal to 1."; + return false; + } is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_shapeB); if (is_null_input_) { MS_LOG(WARNING) << "TensorAddGpuFwdKernel input is null"; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h index e58aeacefb..6e218dddef 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h @@ -46,8 +46,6 @@ class Conv2dGpuFwdKernel : public GpuKernel { pad_left_(0), n_(0), c_(0), - stride_(1), - dilation_(0), group_(1), is_null_input_(false), input_size_(0), @@ -125,8 +123,8 @@ class Conv2dGpuFwdKernel : public GpuKernel { pad_width_ = 0; } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[2], stride_[3], dilation_[2], + dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); input_descriptor_real = input_desc_; } @@ -226,10 +224,10 @@ class Conv2dGpuFwdKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, c_, old_height_ + pad_height_, old_width_ + pad_width_), "cudnnSetTensor4dDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), - "cudnnSetConvolution2dDescriptor failed"); + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( + conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[2], stride_[3], + dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + "cudnnSetConvolution2dDescriptor failed"); } void Set4DDesc(const std::vector &in_shape, const std::vector &filter_shape, @@ -269,22 +267,20 @@ class Conv2dGpuFwdKernel : public GpuKernel { } } void SetStrideAndDilation(const CNodePtr &kernel_node) { - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { - MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!"; + stride_ = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + dilation_ = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_.size() != 4) { + MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!"; } - if (stride_ori[0] != 1 || stride_ori[1] != 1) { - MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!"; + if (stride_[0] != 1 || stride_[1] != 1) { + MS_LOG(EXCEPTION) << "Conv2d stride only support 1 in N axis and C axis!"; } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!"; + if (dilation_.size() != 4) { + MS_LOG(EXCEPTION) << "Conv2d's dilation must be 4d!"; } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { - MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!"; + if (dilation_[0] != 1 || dilation_[1] != 1) { + MS_LOG(EXCEPTION) << "Conv2d dilation only support 1 in N axis and C axis!"; } - stride_ = stride_ori[2]; - dilation_ = dilation_ori[2]; } cudnnHandle_t cudnn_handle_; cudnnTensorDescriptor_t input_desc_; @@ -307,8 +303,8 @@ class Conv2dGpuFwdKernel : public GpuKernel { int pad_left_; int n_; int c_; - int stride_; - int dilation_; + std::vector stride_; + std::vector dilation_; int group_; bool is_null_input_; size_t input_size_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h index b9d74b036e..7db4e115df 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -46,8 +46,6 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { pad_left_(0), n_(0), c_(0), - stride_(1), - dilation_(0), group_(1), is_null_input_(false), input_size_(0), @@ -128,8 +126,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { pad_width_ = 0; } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2], + dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "GetConvolution2dDescriptor failed"); x_desc_real = x_desc_; } @@ -229,10 +227,10 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, c_, old_height_ + pad_height_, old_width_ + pad_width_), "cudnnSetTensor4dDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), - "cudnnSetConvolution2dDescriptor failed"); + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( + conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1], + dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) { if (group_ > 1 || CUDNN_MAJOR < 7) { @@ -277,19 +275,17 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { "SetTensor4dDescriptor failed"); } void SetStrideAndDilation(const CNodePtr &kernel_node) { - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { - MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!"; + stride_ = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + dilation_ = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_.size() != 2) { + MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!"; } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!"; + if (dilation_.size() != 4) { + MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's dilation must be 4d!"; } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { + if (dilation_[0] != 1 || dilation_[1] != 1) { MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!"; } - stride_ = stride_ori[0]; - dilation_ = dilation_ori[2]; } cudnnHandle_t cudnn_handle_; cudnnFilterDescriptor_t dw_desc_; @@ -312,8 +308,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { int pad_left_; int n_; int c_; - int stride_; - int dilation_; + std::vector stride_; + std::vector dilation_; int group_; bool is_null_input_; size_t input_size_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h index a1fb7f324f..bdd82ad575 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -46,8 +46,6 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { pad_left_(0), n_(0), c_(0), - stride_(1), - dilation_(0), group_(1), is_null_input_(false), input_size_(0), @@ -84,7 +82,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { cudnnConvolutionBackwardData(cudnn_handle_, &alpha, w_desc_, w, dy_desc_, dy, conv_desc_, algo_, work_space, workspace_size_, &beta, padded_descriptor_, padded), "ConvolutionBackwardData failed"); - CalPadGrad(padded_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_, + CalPadGrad(input_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_, old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast(stream_ptr)); } else { CHECK_CUDNN_RET_WITH_EXCEPT( @@ -129,8 +127,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { pad_width_ = 0; } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2], + dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); dx_desc_real = dx_desc_; } @@ -229,10 +227,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, c_, old_height_ + pad_height_, old_width_ + pad_width_), "cudnnSetTensor4dDescriptor failed"); - CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), - "cudnnSetConvolution2dDescriptor failed"); + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor( + conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1], + dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), + "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { if (group_ > 1 || CUDNN_MAJOR < 7) { @@ -275,19 +273,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { "SetTensor4dDescriptor failed"); } void SetStrideAndDilation(const CNodePtr &kernel_node) { - auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); - auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); - if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { - MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!"; + stride_ = AnfAlgo::GetNodeAttr>(kernel_node, "stride"); + dilation_ = AnfAlgo::GetNodeAttr>(kernel_node, "dilation"); + if (stride_.size() != 2) { + MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!"; } - if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) { - MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!"; + if (dilation_.size() != 4) { + MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's dilation must be 4d!"; } - if (dilation_ori[0] != 1 || dilation_ori[1] != 1) { + if (dilation_[0] != 1 || dilation_[1] != 1) { MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!"; } - stride_ = stride_ori[0]; - dilation_ = dilation_ori[2]; } cudnnHandle_t cudnn_handle_; cudnnFilterDescriptor_t w_desc_; @@ -309,8 +305,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { int pad_left_; int n_; int c_; - int stride_; - int dilation_; + std::vector stride_; + std::vector dilation_; int group_; bool is_null_input_; size_t input_size_;