!12881 add GaussianBlur and Canny for lite_cv

From: @tiancixiao Reviewed-by: Signed-off-by:
4 years ago · 9baec79337
parent 2b01887371 449452c7ef
commit 9baec79337
6 changed files with 978 additions and 15 deletions
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/CMakeLists.txt
@ -3,4 +3,6 @@ set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE
 add_library(lite-cv OBJECT
            image_process.cc
            warp_affine.cc
+            gaussian_blur.cc
+            canny.cc
            lite_mat.cc)
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/gaussian_blur.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/gaussian_blur.cc
@ -0,0 +1,90 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <math.h>
+
+#include "lite_cv/lite_mat.h"
+#include "lite_cv/image_process.h"
+
+#ifdef ENABLE_ANDROID
+#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
+#define USE_NEON
+#include <arm_neon.h>
+#endif
+#endif
+
+namespace mindspore {
+namespace dataset {
+
+static void GetGaussianKernel(float *kernel, int size, double sigma) {
+  int n = (size - 1) / 2;
+  std::vector<float> buffer(n);
+  float sum = 0;
+  for (int i = 0; i < n; i++) {
+    int x = i - n;
+    float g = exp(-0.5 * x * x / (sigma * sigma));
+    buffer[i] = g;
+    sum += g;
+  }
+  sum = sum * 2 + 1;
+  if ((size & 1) == 0) {
+    sum += 1;
+  }
+
+  float scale = 1. / sum;
+  float *ptr = kernel;
+  for (int i = 0; i < n; i++) {
+    float g = buffer[i] * scale;
+    ptr[i] = g;
+    ptr[size - 1 - i] = g;
+  }
+  ptr[n] = scale;
+  if ((size & 1) == 0) {
+    ptr[n + 1] = scale;
+  }
+}
+
+bool GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX,  // NOLINT
+                  double sigmaY, PaddBorderType pad_type) {
+  if (src.IsEmpty() || src.data_type_ != LDataType::UINT8) {
+    return false;
+  }
+  if (ksize.size() != 2 || ksize[0] <= 0 || ksize[1] <= 0 || ksize[0] % 2 != 1 || ksize[1] % 2 != 1) {
+    return false;
+  }
+  if (sigmaX <= 0) {
+    return false;
+  }
+  if (sigmaY <= 0) {
+    sigmaY = sigmaX;
+  }
+  if (ksize[0] == 1 && ksize[1] == 1) {
+    dst = src;
+    return true;
+  }
+
+  LiteMat kx, ky;
+  kx.Init(ksize[0], 1, 1, LDataType::FLOAT32);
+  ky.Init(1, ksize[1], 1, LDataType::FLOAT32);
+
+  GetGaussianKernel(kx, ksize[0], sigmaX);
+  GetGaussianKernel(ky, ksize[1], sigmaY);
+
+  return ConvRowCol(src, kx, ky, dst, src.data_type_, pad_type);
+}
+
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -17,10 +17,10 @@
 #include "minddata/dataset/kernels/image/lite_cv/image_process.h"

 #include <float.h>
-#include <math.h>
 #include <limits.h>
 #include <string.h>
 #include <cmath>
+#include <limits>
 #include <vector>
 #include <utility>
 #include <random>
@ -29,19 +29,12 @@
 #include <arm_neon.h>
 #endif

-#ifdef ENABLE_NEON
 #define R2GRAY 9798
 #define G2GRAY 19235
 #define B2GRAY 3735
 #define GRAYSHIFT 15
 #define GRAYSHIFT_DELTA (1 << (GRAYSHIFT - 1))
 #define U32TOU8CAST(value) ((uint8_t)std::min(value, (uint32_t)UCHAR_MAX))
-#else
-#define R2GRAY 77
-#define G2GRAY 150
-#define B2GRAY 29
-#define GRAYSHIFT 8
-#endif

 #define YSCALE 0x0101
 #define UTOB (-128)
@ -247,6 +240,125 @@ static void ResizeBilinear1C(const unsigned char *src, int src_width, int src_he
  delete[] data_buf;
 }

+static inline uint8_t clip(float value, int min = 0, int max = 255) {
+  int int_val = roundf(value);
+  return std::max<int32_t>(std::numeric_limits<uint8_t>::min(),
+                           std::min<int32_t>(std::numeric_limits<uint8_t>::max(), int_val));
+}
+
+template <typename T1, typename T2>
+static bool Conv2DImplement(const LiteMat &src, const LiteMat &kernel, T2 *dst, LDataType dst_type,
+                            PaddBorderType pad_type) {
+  int border_x = static_cast<int>(kernel.width_ / 2);
+  int border_y = static_cast<int>(kernel.height_ / 2);
+
+  LiteMat pad_mat;
+  pad_mat.Init(src.width_ + 2 * border_x, src.height_ + 2 * border_y, src.channel_, src.data_type_);
+
+  if (!Pad(src, pad_mat, border_y, border_y, border_x, border_x, pad_type)) {
+    return false;
+  }
+
+  const T1 *pad_ptr = pad_mat;
+  const float *kernel_ptr = kernel;
+  T2 *dst_ptr = dst;
+
+  int pad_step = pad_mat.width_ * pad_mat.channel_;
+  int dst_step = src.width_ * src.channel_;
+
+  if (src.channel_ == 1) {
+    for (int y = border_y; y < pad_mat.height_ - border_y; y++) {
+      for (int x = border_x; x < pad_mat.width_ - border_x; x++) {
+        float conv_sum = 0;
+        for (int i = -border_y; i < -border_y + kernel.height_; i++) {
+          for (int j = -border_x; j < -border_x + kernel.width_; j++) {
+            conv_sum += pad_ptr[(y + i) * pad_step + (x + j) * pad_mat.channel_] *
+                        kernel_ptr[(i + border_y) * kernel.width_ + (j + border_x)];
+          }
+        }
+        if (dst_type == LDataType::UINT8) {
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_] = clip(conv_sum);
+        } else {
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_] = conv_sum;
+        }
+      }
+    }
+  } else if (src.channel_ == 3) {
+    for (int y = border_y; y < pad_mat.height_ - border_y; y++) {
+      for (int x = border_x; x < pad_mat.width_ - border_x; x++) {
+        float conv_sum_b = 0;
+        float conv_sum_g = 0;
+        float conv_sum_r = 0;
+        for (int i = -border_y; i < -border_y + kernel.height_; i++) {
+          for (int j = -border_x; j < -border_x + kernel.width_; j++) {
+            conv_sum_b += pad_ptr[(y + i) * pad_step + (x + j) * pad_mat.channel_] *
+                          kernel_ptr[(i + border_y) * kernel.width_ + (j + border_x)];
+            conv_sum_g += pad_ptr[(y + i) * pad_step + (x + j) * pad_mat.channel_ + 1] *
+                          kernel_ptr[(i + border_y) * kernel.width_ + (j + border_x)];
+            conv_sum_r += pad_ptr[(y + i) * pad_step + (x + j) * pad_mat.channel_ + 2] *
+                          kernel_ptr[(i + border_y) * kernel.width_ + (j + border_x)];
+          }
+        }
+        if (dst_type == LDataType::UINT8) {
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_] = clip(conv_sum_b);
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_ + 1] = clip(conv_sum_g);
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_ + 2] = clip(conv_sum_r);
+        } else {
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_] = conv_sum_b;
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_ + 1] = conv_sum_g;
+          dst_ptr[(y - border_y) * dst_step + (x - border_x) * src.channel_ + 2] = conv_sum_r;
+        }
+      }
+    }
+  } else {
+    return false;
+  }
+  return true;
+}
+
+bool Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type, PaddBorderType pad_type) {
+  if (src.IsEmpty() || kernel.IsEmpty()) {
+    return false;
+  }
+  if ((dst_type != LDataType::UINT8 && dst_type != LDataType::FLOAT32) || kernel.data_type_ != LDataType::FLOAT32) {
+    return false;
+  }
+  if (dst.IsEmpty() || dst.width_ != src.width_ || dst.height_ != src.height_ || dst.channel_ != src.channel_ ||
+      dst.data_type_ != dst_type) {
+    dst.Init(src.width_, src.height_, src.channel_, dst_type);
+  }
+
+  if (src.data_type_ == LDataType::UINT8 && dst.data_type_ == LDataType::UINT8) {
+    return Conv2DImplement<uint8_t, uint8_t>(src, kernel, dst, dst_type, pad_type);
+  } else if (src.data_type_ == LDataType::UINT8 && dst.data_type_ == LDataType::FLOAT32) {
+    return Conv2DImplement<uint8_t, float>(src, kernel, dst, dst_type, pad_type);
+  } else if (src.data_type_ == LDataType::FLOAT32 && dst.data_type_ == LDataType::UINT8) {
+    return Conv2DImplement<float, uint8_t>(src, kernel, dst, dst_type, pad_type);
+  } else if (src.data_type_ == LDataType::FLOAT32 && dst.data_type_ == LDataType::FLOAT32) {
+    return Conv2DImplement<float, float>(src, kernel, dst, dst_type, pad_type);
+  } else {
+    return false;
+  }
+}
+
+bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type,
+                PaddBorderType pad_type) {
+  if (src.IsEmpty() || kx.IsEmpty() || ky.IsEmpty()) {
+    return false;
+  }
+  if (dst_type != LDataType::UINT8 && dst_type != LDataType::FLOAT32) {
+    return false;
+  }
+  if (dst.IsEmpty() || dst.width_ != src.width_ || dst.height_ != src.height_ || dst.channel_ != src.channel_ ||
+      dst.data_type_ != dst_type) {
+    dst.Init(src.width_, src.height_, src.channel_, dst_type);
+  }
+
+  LiteMat mid;
+  bool ret = Conv2D(src, kx, mid, LDataType::FLOAT32, pad_type) && Conv2D(mid, ky, dst, dst_type, pad_type);
+  return ret;
+}
+
 bool ResizeBilinear(const LiteMat &src, LiteMat &dst, int dst_w, int dst_h) {
  if (dst_h <= 0 || dst_w <= 0) {
    return false;
@ -485,7 +597,7 @@ static bool ConvertRGBAToGRAY(const unsigned char *data, LDataType data_type, in
 #else
    for (int y = 0; y < h; y++) {
      for (int x = 0; x < w; x++) {
-        *ptr = (data_ptr[2] * B2GRAY + data_ptr[1] * G2GRAY + data_ptr[0] * R2GRAY) >> GRAYSHIFT;
+        *ptr = (data_ptr[2] * B2GRAY + data_ptr[1] * G2GRAY + data_ptr[0] * R2GRAY + GRAYSHIFT_DELTA) >> GRAYSHIFT;
        ptr++;
        data_ptr += 4;
      }
@ -763,6 +875,45 @@ static void PadWithConstant(const LiteMat &src, LiteMat &dst, const int top, con
  }
 }

+static int PadFromPos(int p, int len, PaddBorderType pad_type) {
+  if (p >= 0 && p < len) {
+    return p;
+  }
+  if (pad_type == PaddBorderType::PADD_BORDER_REPLICATE) {
+    return p < 0 ? 0 : len - 1;
+  } else {
+    return p < 0 ? -p : 2 * len - p - 2;
+  }
+}
+
+template <typename T>
+static void PadImplement(const LiteMat &src, LiteMat &dst, const int top, const int bottom, const int left,
+                         const int right, const PaddBorderType pad_type) {
+  int src_step = src.width_ * src.channel_;
+  int dst_step = dst.width_ * dst.channel_;
+
+  uint8_t *src_data_ptr = reinterpret_cast<uint8_t *>(src.data_ptr_);
+  uint8_t *dst_data_ptr = reinterpret_cast<uint8_t *>(dst.data_ptr_);
+  for (int i = 0; i < src.height_; i++) {
+    memcpy(dst_data_ptr + (i + top) * dst.steps_[0] + left * dst.steps_[1], src_data_ptr + i * src.steps_[0],
+           src.steps_[0]);
+  }
+
+  const T *src_ptr = src;
+  T *dst_ptr = dst;
+  for (int y = 0; y < dst.height_; y++) {
+    for (int x = 0; x < dst.width_; x++) {
+      if (y < top || y >= dst.height_ - bottom || x < left || x >= dst.width_ - right) {
+        int src_y = PadFromPos(y - top, src.height_, pad_type);
+        int src_x = PadFromPos(x - left, src.width_, pad_type);
+        for (int cn = 0; cn < dst.channel_; cn++) {
+          dst_ptr[y * dst_step + x * dst.channel_ + cn] = src_ptr[src_y * src_step + src_x * src.channel_ + cn];
+        }
+      }
+    }
+  }
+}
+
 template <typename T>
 void ExtractChannelImpl(const T *src_ptr, T *dst_ptr, int height, int width, int channel, int col) {
  int total = height * width;
@ -909,6 +1060,10 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri
    PadWithConstant<float>(src, dst, top, bottom, left, right, pad_type, fill_b_or_gray, fill_g, fill_r);
  } else if (pad_type == PADD_BORDER_CONSTANT && src.data_type_ == LDataType::UINT8) {
    PadWithConstant<uint8_t>(src, dst, top, bottom, left, right, pad_type, fill_b_or_gray, fill_g, fill_r);
+  } else if (src.data_type_ == LDataType::FLOAT32) {
+    PadImplement<float>(src, dst, top, bottom, left, right, pad_type);
+  } else if (src.data_type_ == LDataType::UINT8) {
+    PadImplement<uint8_t>(src, dst, top, bottom, left, right, pad_type);
  } else {
    return false;
  }
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h
@ -32,7 +32,12 @@ namespace dataset {
 #define INT16_CAST(X) \
  static_cast<int16_t>(::std::min(::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767));

-enum PaddBorderType { PADD_BORDER_CONSTANT = 0, PADD_BORDER_REPLICATE = 1 };
+enum PaddBorderType {
+  PADD_BORDER_CONSTANT = 0,
+  PADD_BORDER_REPLICATE = 1,
+  PADD_BORDER_REFLECT_101 = 4,
+  PADD_BORDER_DEFAULT = PADD_BORDER_REFLECT_101
+};

 struct BoxesConfig {
 public:
@ -65,7 +70,7 @@ bool SubStractMeanNormalize(const LiteMat &src, LiteMat &dst, const std::vector<

 /// \brief padd image, the channel supports is 3 and 1
 bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int right, PaddBorderType pad_type,
-         uint8_t fill_b_or_gray, uint8_t fill_g, uint8_t fill_r);
+         uint8_t fill_b_or_gray = 0, uint8_t fill_g = 0, uint8_t fill_r = 0);

 /// \brief Extract image channel by index
 bool ExtractChannel(LiteMat &src, LiteMat &dst, int col);
@ -113,6 +118,25 @@ bool GetAffineTransform(std::vector<Point> src_point, std::vector<Point> dst_poi
 /// \brief Matrix transpose
 bool Transpose(LiteMat &src, LiteMat &dst);

+/// \brief Filter the image by a Gaussian kernel
+bool GaussianBlur(const LiteMat &src, LiteMat &dst, const std::vector<int> &ksize, double sigmaX, double sigmaY = 0.f,
+                  PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
+
+/// \brief Detect edges in an image
+bool Canny(const LiteMat &src, LiteMat &dst, double low_thresh, double high_thresh, int ksize = 3,
+           bool L2gradient = false);
+
+/// \brief Apply a 2D convolution over the image
+bool Conv2D(const LiteMat &src, const LiteMat &kernel, LiteMat &dst, LDataType dst_type,
+            PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
+
+/// \brief Applies a separable linear convolution over the image
+bool ConvRowCol(const LiteMat &src, const LiteMat &kx, const LiteMat &ky, LiteMat &dst, LDataType dst_type,
+                PaddBorderType pad_type = PaddBorderType::PADD_BORDER_DEFAULT);
+
+/// \brief Filter the image by a Sobel kernel
+bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize, PaddBorderType pad_type);
+
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // IMAGE_PROCESS_H_
--- a/tests/ut/cpp/dataset/image_process_test.cc
+++ b/tests/ut/cpp/dataset/image_process_test.cc