From 944c9ec933105c548b3a499a9fcd4c9d88fd2333 Mon Sep 17 00:00:00 2001
From: VectorSL <shiliang10@huawei.com>
Date: Thu, 30 Apr 2020 16:17:29 +0800
Subject: [PATCH] gpu tensoradd add shape validation check

---
 .../kernel/gpu/math/tensoradd_gpu_kernel.h    |  8 ++++
 .../ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h   | 40 +++++++++----------
 .../gpu/nn/conv2d_grad_filter_gpu_kernel.h    | 34 +++++++---------
 .../gpu/nn/conv2d_grad_input_gpu_kernel.h     | 36 ++++++++---------
 4 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
index 4dfbf4c3d4..f2029cf470 100644
--- a/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/math/tensoradd_gpu_kernel.h
@@ -86,6 +86,14 @@ class TensorAddGpuFwdKernel : public GpuKernel {
     }
     auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
     auto input_shapeB = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
+    auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
+    if (input_shape != output_shape && input_shapeB != output_shape) {
+      MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n"
+                       "InputA must match the corresponding dimension of the destination tensor outC, and each "
+                       "dimension of the inputB"
+                       "must match the corresponding dimension of outC or must be equal to 1.";
+      return false;
+    }
     is_null_input_ = CHECK_NULL_INPUT(input_shape) || CHECK_NULL_INPUT(input_shapeB);
     if (is_null_input_) {
       MS_LOG(WARNING) << "TensorAddGpuFwdKernel input is null";
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
index e58aeacefb..6e218dddef 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h
@@ -46,8 +46,6 @@ class Conv2dGpuFwdKernel : public GpuKernel {
         pad_left_(0),
         n_(0),
         c_(0),
-        stride_(1),
-        dilation_(0),
         group_(1),
         is_null_input_(false),
         input_size_(0),
@@ -125,8 +123,8 @@ class Conv2dGpuFwdKernel : public GpuKernel {
         pad_width_ = 0;
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
-        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[2], stride_[3], dilation_[2],
+                                        dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "cudnnSetConvolution2dDescriptor failed");
       input_descriptor_real = input_desc_;
     }
@@ -226,10 +224,10 @@ class Conv2dGpuFwdKernel : public GpuKernel {
     CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_desc_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_, c_,
                                                            old_height_ + pad_height_, old_width_ + pad_width_),
                                 "cudnnSetTensor4dDescriptor failed");
-    CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
-      "cudnnSetConvolution2dDescriptor failed");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor(
+                                  conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[2], stride_[3],
+                                  dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+                                "cudnnSetConvolution2dDescriptor failed");
   }
 
   void Set4DDesc(const std::vector<size_t> &in_shape, const std::vector<size_t> &filter_shape,
@@ -269,22 +267,20 @@ class Conv2dGpuFwdKernel : public GpuKernel {
     }
   }
   void SetStrideAndDilation(const CNodePtr &kernel_node) {
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) {
-      MS_LOG(EXCEPTION) << "conv2d only support equal stride, and stride must be 4d!";
+    stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_.size() != 4) {
+      MS_LOG(EXCEPTION) << "Conv2d's' stride must be 4d!";
     }
-    if (stride_ori[0] != 1 || stride_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "conv2d stride only support 1 in N axis and C axis!";
+    if (stride_[0] != 1 || stride_[1] != 1) {
+      MS_LOG(EXCEPTION) << "Conv2d stride only support 1 in N axis and C axis!";
     }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "conv2d only support equal dilation, and dilation must be 4d!";
+    if (dilation_.size() != 4) {
+      MS_LOG(EXCEPTION) << "Conv2d's dilation must be 4d!";
     }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
-      MS_LOG(EXCEPTION) << "conv2d dilation only support 1 in N axis and C axis!";
+    if (dilation_[0] != 1 || dilation_[1] != 1) {
+      MS_LOG(EXCEPTION) << "Conv2d dilation only support 1 in N axis and C axis!";
     }
-    stride_ = stride_ori[2];
-    dilation_ = dilation_ori[2];
   }
   cudnnHandle_t cudnn_handle_;
   cudnnTensorDescriptor_t input_desc_;
@@ -307,8 +303,8 @@ class Conv2dGpuFwdKernel : public GpuKernel {
   int pad_left_;
   int n_;
   int c_;
-  int stride_;
-  int dilation_;
+  std::vector<int> stride_;
+  std::vector<int> dilation_;
   int group_;
   bool is_null_input_;
   size_t input_size_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index b9d74b036e..7db4e115df 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -46,8 +46,6 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
         pad_left_(0),
         n_(0),
         c_(0),
-        stride_(1),
-        dilation_(0),
         group_(1),
         is_null_input_(false),
         input_size_(0),
@@ -128,8 +126,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
         pad_width_ = 0;
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
-        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2],
+                                        dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "GetConvolution2dDescriptor failed");
       x_desc_real = x_desc_;
     }
@@ -229,10 +227,10 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
     CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_,
                                                            c_, old_height_ + pad_height_, old_width_ + pad_width_),
                                 "cudnnSetTensor4dDescriptor failed");
-    CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
-      "cudnnSetConvolution2dDescriptor failed");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor(
+                                  conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1],
+                                  dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+                                "cudnnSetConvolution2dDescriptor failed");
   }
   void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) {
     if (group_ > 1 || CUDNN_MAJOR < 7) {
@@ -277,19 +275,17 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
       "SetTensor4dDescriptor failed");
   }
   void SetStrideAndDilation(const CNodePtr &kernel_node) {
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
-      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal stride, and stride must be 2d!";
+    stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_.size() != 2) {
+      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's stride must be 2d!";
     }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel only support equal dilation, and dilation must be 4d!";
+    if (dilation_.size() != 4) {
+      MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel's dilation must be 4d!";
     }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
+    if (dilation_[0] != 1 || dilation_[1] != 1) {
       MS_LOG(EXCEPTION) << "ConvGradFilterGpuBkwKernel dilation only support 1 in N axis and C axis!";
     }
-    stride_ = stride_ori[0];
-    dilation_ = dilation_ori[2];
   }
   cudnnHandle_t cudnn_handle_;
   cudnnFilterDescriptor_t dw_desc_;
@@ -312,8 +308,8 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
   int pad_left_;
   int n_;
   int c_;
-  int stride_;
-  int dilation_;
+  std::vector<int> stride_;
+  std::vector<int> dilation_;
   int group_;
   bool is_null_input_;
   size_t input_size_;
diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
index a1fb7f324f..bdd82ad575 100644
--- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -46,8 +46,6 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
         pad_left_(0),
         n_(0),
         c_(0),
-        stride_(1),
-        dilation_(0),
         group_(1),
         is_null_input_(false),
         input_size_(0),
@@ -84,7 +82,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
         cudnnConvolutionBackwardData(cudnn_handle_, &alpha, w_desc_, w, dy_desc_, dy, conv_desc_, algo_, work_space,
                                      workspace_size_, &beta, padded_descriptor_, padded),
         "ConvolutionBackwardData failed");
-      CalPadGrad(padded_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
+      CalPadGrad(input_size_ / sizeof(T), padded, n_, c_, old_height_, old_width_, old_height_ + pad_height_,
                  old_width_ + pad_width_, pad_top_, pad_left_, dx, reinterpret_cast<cudaStream_t>(stream_ptr));
     } else {
       CHECK_CUDNN_RET_WITH_EXCEPT(
@@ -129,8 +127,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
         pad_width_ = 0;
       }
       CHECK_CUDNN_RET_WITH_EXCEPT(
-        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_,
-                                        CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+        cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_[0], stride_[1], dilation_[2],
+                                        dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
         "cudnnSetConvolution2dDescriptor failed");
       dx_desc_real = dx_desc_;
     }
@@ -229,10 +227,10 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
     CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetTensor4dDescriptor(padded_descriptor_, CUDNN_TENSOR_NCHW, cudnn_data_type_, n_,
                                                            c_, old_height_ + pad_height_, old_width_ + pad_width_),
                                 "cudnnSetTensor4dDescriptor failed");
-    CHECK_CUDNN_RET_WITH_EXCEPT(
-      cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_,
-                                      dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
-      "cudnnSetConvolution2dDescriptor failed");
+    CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolution2dDescriptor(
+                                  conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_[0], stride_[1],
+                                  dilation_[2], dilation_[3], CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT),
+                                "cudnnSetConvolution2dDescriptor failed");
   }
   void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) {
     if (group_ > 1 || CUDNN_MAJOR < 7) {
@@ -275,19 +273,17 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
       "SetTensor4dDescriptor failed");
   }
   void SetStrideAndDilation(const CNodePtr &kernel_node) {
-    auto stride_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
-    auto dilation_ori = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
-    if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) {
-      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal stride, and stride must be 2d!";
+    stride_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "stride");
+    dilation_ = AnfAlgo::GetNodeAttr<std::vector<int>>(kernel_node, "dilation");
+    if (stride_.size() != 2) {
+      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's stride must be 2d!";
     }
-    if (dilation_ori.size() != 4 || dilation_ori[2] != dilation_ori[3]) {
-      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel only support equal dilation, and dilation must be 4d!";
+    if (dilation_.size() != 4) {
+      MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel's dilation must be 4d!";
     }
-    if (dilation_ori[0] != 1 || dilation_ori[1] != 1) {
+    if (dilation_[0] != 1 || dilation_[1] != 1) {
       MS_LOG(EXCEPTION) << "ConvGradInputGpuBkwKernel dilation only support 1 in N axis and C axis!";
     }
-    stride_ = stride_ori[0];
-    dilation_ = dilation_ori[2];
   }
   cudnnHandle_t cudnn_handle_;
   cudnnFilterDescriptor_t w_desc_;
@@ -309,8 +305,8 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
   int pad_left_;
   int n_;
   int c_;
-  int stride_;
-  int dilation_;
+  std::vector<int> stride_;
+  std::vector<int> dilation_;
   int group_;
   bool is_null_input_;
   size_t input_size_;