From 2a56bea0e58878a9240df37f415770ccda7d1363 Mon Sep 17 00:00:00 2001
From: Corleone <liuchao195@huawei.com>
Date: Fri, 18 Sep 2020 10:31:54 +0800
Subject: [PATCH] add arithmetic and scale opencl ci testcase

---
 .../runtime/kernel/opencl/cl/arithmetic.cl    | 28 +++++++++----------
 .../src/runtime/kernel/opencl/kernel/scale.cc | 25 ++++++-----------
 mindspore/lite/test/run_test.sh               |  4 +--
 .../runtime/kernel/opencl/arithmetic_tests.cc | 16 ++++++-----
 .../src/runtime/kernel/opencl/scale_tests.cc  | 18 ++++++------
 5 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl
index e01d0cdd4d..30357ba516 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl
@@ -116,7 +116,7 @@ __kernel void ElementFloorDiv_IMG(__read_only image2d_t input_a, __read_only ima
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), floor(a / b));
+  WRITE_IMAGE(output, (int2)(X, Y), floor(divide_no_check(a, b)));
 }
 
 __kernel void ElementFloorMod_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -155,7 +155,7 @@ __kernel void ElementEqual_IMG(__read_only image2d_t input_a, __read_only image2
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a == b));
+  WRITE_IMAGE(output, (int2)(X, Y), a == b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementNotEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -168,7 +168,7 @@ __kernel void ElementNotEqual_IMG(__read_only image2d_t input_a, __read_only ima
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a != b));
+  WRITE_IMAGE(output, (int2)(X, Y), a != b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementLess_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -181,7 +181,7 @@ __kernel void ElementLess_IMG(__read_only image2d_t input_a, __read_only image2d
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a < b));
+  WRITE_IMAGE(output, (int2)(X, Y), a < b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementLessEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -194,7 +194,7 @@ __kernel void ElementLessEqual_IMG(__read_only image2d_t input_a, __read_only im
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a <= b));
+  WRITE_IMAGE(output, (int2)(X, Y), a <= b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementGreater_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -207,7 +207,7 @@ __kernel void ElementGreater_IMG(__read_only image2d_t input_a, __read_only imag
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a > b));
+  WRITE_IMAGE(output, (int2)(X, Y), a > b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementGreaterEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b,
@@ -220,7 +220,7 @@ __kernel void ElementGreaterEqual_IMG(__read_only image2d_t input_a, __read_only
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
   FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a >= b));
+  WRITE_IMAGE(output, (int2)(X, Y), a >= b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastAdd_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -327,7 +327,7 @@ __kernel void BroadcastFloorDiv_IMG(__read_only image2d_t input_a, float b, __wr
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), floor(a / (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), floor(divide_no_check(a, (FLT4)b)));
 }
 
 __kernel void BroadcastFloorMod_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -363,7 +363,7 @@ __kernel void BroadcastEqual_IMG(__read_only image2d_t input_a, float b, __write
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a == (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a == (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastNotEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -375,7 +375,7 @@ __kernel void BroadcastNotEqual_IMG(__read_only image2d_t input_a, float b, __wr
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a != (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a != (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastLess_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -387,7 +387,7 @@ __kernel void BroadcastLess_IMG(__read_only image2d_t input_a, float b, __write_
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a < (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a < (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastLessEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -399,7 +399,7 @@ __kernel void BroadcastLessEqual_IMG(__read_only image2d_t input_a, float b, __w
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a <= (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a <= (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastGreater_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -411,7 +411,7 @@ __kernel void BroadcastGreater_IMG(__read_only image2d_t input_a, float b, __wri
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a > (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a > (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void BroadcastGreaterEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output,
@@ -423,7 +423,7 @@ __kernel void BroadcastGreaterEqual_IMG(__read_only image2d_t input_a, float b,
   }
 
   FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y));
-  WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a >= (FLT4)b));
+  WRITE_IMAGE(output, (int2)(X, Y), a >= (FLT4)b ? (FLT4)1.f : (FLT4).0f);
 }
 
 __kernel void ElementAdd_BUF(__global float *input_a, __global float *input_b, __global float *output,
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
index c329d4a502..97ccf03f68 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
@@ -311,26 +311,19 @@ int ScaleOpenCLKernel::Run() {
     ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
     ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
   } else {
-    if (in_tensors_[0]->data_type() == kNumberTypeFloat32) {
+    if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
       float scale = static_cast<float *>(in_tensors_[1]->data_c())[0];
       float offset = static_cast<float *>(in_tensors_[2]->data_c())[0];
       ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
       ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
-    } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) {
-      if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
-        float scale = static_cast<float *>(in_tensors_[1]->data_c())[0];
-        float offset = static_cast<float *>(in_tensors_[2]->data_c())[0];
-        ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
-        ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
-      } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
-        float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0];
-        float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0];
-        ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale));
-        ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset));
-      } else {
-        MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type();
-        return RET_ERROR;
-      }
+    } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
+      float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0];
+      float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0];
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale));
+      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset));
+    } else {
+      MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type();
+      return RET_ERROR;
     }
   }
   ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
diff --git a/mindspore/lite/test/run_test.sh b/mindspore/lite/test/run_test.sh
index 3307238195..44ade02040 100755
--- a/mindspore/lite/test/run_test.sh
+++ b/mindspore/lite/test/run_test.sh
@@ -31,12 +31,10 @@ cp -fr $TEST_DATA_DIR/testPK ./data
 
 # for GPU OpenCL
 ./lite-test --gtest_filter="TestConvolutionOpenCL.simple_test*"
-
 ./lite-test --gtest_filter="TestArithmeticSelfOpenCLCI.ArithmeticSelfRound*"
 ./lite-test --gtest_filter="TestConcatOpenCLCI.ConcatFp32_2inputforCI*"
 ./lite-test --gtest_filter="TestSliceOpenCLfp32.Slicefp32CI*"
 ./lite-test --gtest_filter="TestBatchnormOpenCLCI.Batchnormfp32CI*"
-
 ./lite-test --gtest_filter="TestAvgPoolingOpenCL*"
 ./lite-test --gtest_filter="TestConv2dTransposeOpenCL*"
 ./lite-test --gtest_filter="TestMatMulOpenCL.MatMul2D*"
@@ -46,3 +44,5 @@ cp -fr $TEST_DATA_DIR/testPK ./data
 ./lite-test --gtest_filter="TestReshapeOpenCL*"
 ./lite-test --gtest_filter="TestSoftmaxOpenCL*"
 ./lite-test --gtest_filter="TestTransposeOpenCL*"
+./lite-test --gtest_filter="TestArithmeticOpenCL*"
+./lite-test --gtest_filter="TestScaleOpenCL*"
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
index 63bb2022b2..1408a50d8f 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc
@@ -67,6 +67,7 @@ static void LogData(void *data, const int size, const std::string prefix) {
 
 template <class T>
 static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
+  bool is_log_data = false;
   auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
   auto allocator = ocl_runtime->GetAllocator();
 
@@ -126,7 +127,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   }
   std::vector<lite::Tensor *> outputs = {tensor_c};
 
-  ArithmeticParameter *param = new (std::nothrow) ArithmeticParameter();
+  ArithmeticParameter *param = static_cast<ArithmeticParameter *>(malloc(sizeof(ArithmeticParameter)));
   param->broadcasting_ = is_bias_add;
   if (param == nullptr) {
     MS_LOG(ERROR) << "Create parameter failed!";
@@ -156,7 +157,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
     delete[] data_b;
     delete[] data_c_cpu;
     delete[] data_c_ocl;
-    delete param;
+    free(param);
     return;
   }
   arith_kernel->Init();
@@ -188,10 +189,12 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
 
   memcpy(data_c_ocl, outputs[0]->data_c(), sizeof(T) * element_num);
 
-  LogData<T>(data_a, 10, "Data A : ");
-  LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
-  LogData<T>(data_c_cpu, 10, "Expect compute : ");
-  LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : ");
+  if (is_log_data) {
+    LogData<T>(data_a, 10, "Data A : ");
+    LogData<T>(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : ");
+    LogData<T>(data_c_cpu, 10, "Expect compute : ");
+    LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : ");
+  }
   bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num);
   MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
   EXPECT_EQ(true, cmp);
@@ -203,7 +206,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   delete[] data_c_ocl;
 
   delete kernel;
-  delete param;
   for (auto tensor : inputs) {
     delete tensor;
   }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
index 8224918397..1875aec574 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc
@@ -67,6 +67,7 @@ static void LogData(void *data, const int size, const std::string prefix) {
 
 template <class T>
 static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &shape_b) {
+  bool is_log_data = false;
   auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
   auto allocator = ocl_runtime->GetAllocator();
 
@@ -137,7 +138,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   }
   std::vector<lite::Tensor *> outputs = {tensor_out};
 
-  ScaleParameter *param = new (std::nothrow) ScaleParameter();
+  ScaleParameter *param = static_cast<ScaleParameter *>(malloc(sizeof(ScaleParameter)));
   if (param == nullptr) {
     MS_LOG(ERROR) << "Create parameter failed!";
     delete tensor_in;
@@ -170,7 +171,7 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
     delete[] data_offset;
     delete[] data_out_cpu;
     delete[] data_out_ocl;
-    delete param;
+    free(param);
     return;
   }
   scale_kernel->Init();
@@ -206,11 +207,13 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
 
   memcpy(data_out_ocl, outputs[0]->data_c(), sizeof(T) * element_num);
 
-  LogData<T>(data_in, 10, "Data input : ");
-  LogData<T>(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : ");
-  LogData<T>(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : ");
-  LogData<T>(data_out_cpu, 10, "Expect compute : ");
-  LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : ");
+  if (is_log_data) {
+    LogData<T>(data_in, 10, "Data input : ");
+    LogData<T>(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : ");
+    LogData<T>(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : ");
+    LogData<T>(data_out_cpu, 10, "Expect compute : ");
+    LogData<T>(outputs[0]->data_c(), 10, "OpenCL compute : ");
+  }
   bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num);
   MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!");
   EXPECT_EQ(true, cmp);
@@ -223,7 +226,6 @@ static void TestCase(const std::vector<int> &shape_a, const std::vector<int> &sh
   delete[] data_out_ocl;
 
   delete kernel;
-  delete param;
   for (auto tensor : inputs) {
     delete tensor;
   }