MD lite build failed in arm when build in X86_64

4 years ago · befb3518e4
parent ed940a5b64
commit befb3518e4
2 changed files with 141 additions and 118 deletions
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc
@ -18,12 +18,9 @@
 #include <limits>
 #include <algorithm>
 #include <cmath>
-#ifdef ENABLE_ANDROID
-#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64)
-#define USE_NEON
+#ifdef ENABLE_NEON
 #include <arm_neon.h>
 #endif
-#endif

 namespace mindspore {
 namespace dataset {
@ -333,7 +330,7 @@ inline void SubtractImpl(const T *src0, const T *src1, T *dst, int64_t total_siz
 template <>
 inline void SubtractImpl(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int64_t total_size) {
  int64_t x = 0;
-#ifdef USE_NEON
+#ifdef ENABLE_NEON
  const int64_t step = 32;
  for (; x <= total_size - step; x += step) {
    uint8x16_t v_src00 = vld1q_u8(src0 + x);
@ -432,7 +429,7 @@ bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
  return true;
 }

-#ifdef USE_NEON
+#ifdef ENABLE_NEON
 inline float32x4_t reciprocal_simd(float32x4_t val) {
  // get an initial estimate of 1/val
  float32x4_t reciprocal = vrecpeq_f32(val);
@ -461,7 +458,7 @@ inline void DivideImpl(const T *src0, const T *src1, T *dst, int64_t total_size)
 template <>
 inline void DivideImpl(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int64_t total_size) {
  int64_t x = 0;
-#ifdef USE_NEON
+#ifdef ENABLE_NEON
  const int64_t step = 16;
  for (; x <= total_size - step; x += step) {
    __builtin_prefetch(reinterpret_cast<const char *>(src0 + x) + 32 * 10);
@ -558,7 +555,7 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst

 bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
  if (!CheckDivide(src_a, src_b, dst)) {
-    return true;
+    return false;
  }

  if (dst->IsEmpty()) {
@ -606,7 +603,7 @@ inline void MultiplyImpl(const T *src0, const T *src1, T *dst, int64_t total_siz
 template <>
 inline void MultiplyImpl(const uint8_t *src0, const uint8_t *src1, uint8_t *dst, int64_t total_size) {
  int64_t x = 0;
-#ifdef USE_NEON
+#ifdef ENABLE_NEON
  const int64_t step = 32;
  for (; x <= total_size - step; x += step) {
    uint8x16_t v_src00 = vld1q_u8(src0 + x);
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc