From 2a56bea0e58878a9240df37f415770ccda7d1363 Mon Sep 17 00:00:00 2001 From: Corleone Date: Fri, 18 Sep 2020 10:31:54 +0800 Subject: [PATCH] add arithmetic and scale opencl ci testcase --- .../runtime/kernel/opencl/cl/arithmetic.cl | 28 +++++++++---------- .../src/runtime/kernel/opencl/kernel/scale.cc | 25 ++++++----------- mindspore/lite/test/run_test.sh | 4 +-- .../runtime/kernel/opencl/arithmetic_tests.cc | 16 ++++++----- .../src/runtime/kernel/opencl/scale_tests.cc | 18 ++++++------ 5 files changed, 44 insertions(+), 47 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl index e01d0cdd4d..30357ba516 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/arithmetic.cl @@ -116,7 +116,7 @@ __kernel void ElementFloorDiv_IMG(__read_only image2d_t input_a, __read_only ima FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), floor(a / b)); + WRITE_IMAGE(output, (int2)(X, Y), floor(divide_no_check(a, b))); } __kernel void ElementFloorMod_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -155,7 +155,7 @@ __kernel void ElementEqual_IMG(__read_only image2d_t input_a, __read_only image2 FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a == b)); + WRITE_IMAGE(output, (int2)(X, Y), a == b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementNotEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -168,7 +168,7 @@ __kernel void ElementNotEqual_IMG(__read_only image2d_t input_a, __read_only ima FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a != b)); + WRITE_IMAGE(output, (int2)(X, Y), a != b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementLess_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -181,7 +181,7 @@ __kernel void ElementLess_IMG(__read_only image2d_t input_a, __read_only image2d FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a < b)); + WRITE_IMAGE(output, (int2)(X, Y), a < b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementLessEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -194,7 +194,7 @@ __kernel void ElementLessEqual_IMG(__read_only image2d_t input_a, __read_only im FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a <= b)); + WRITE_IMAGE(output, (int2)(X, Y), a <= b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementGreater_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -207,7 +207,7 @@ __kernel void ElementGreater_IMG(__read_only image2d_t input_a, __read_only imag FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a > b)); + WRITE_IMAGE(output, (int2)(X, Y), a > b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementGreaterEqual_IMG(__read_only image2d_t input_a, __read_only image2d_t input_b, @@ -220,7 +220,7 @@ __kernel void ElementGreaterEqual_IMG(__read_only image2d_t input_a, __read_only FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); FLT4 b = READ_IMAGE(input_b, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a >= b)); + WRITE_IMAGE(output, (int2)(X, Y), a >= b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastAdd_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -327,7 +327,7 @@ __kernel void BroadcastFloorDiv_IMG(__read_only image2d_t input_a, float b, __wr } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), floor(a / (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), floor(divide_no_check(a, (FLT4)b))); } __kernel void BroadcastFloorMod_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -363,7 +363,7 @@ __kernel void BroadcastEqual_IMG(__read_only image2d_t input_a, float b, __write } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a == (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a == (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastNotEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -375,7 +375,7 @@ __kernel void BroadcastNotEqual_IMG(__read_only image2d_t input_a, float b, __wr } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a != (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a != (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastLess_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -387,7 +387,7 @@ __kernel void BroadcastLess_IMG(__read_only image2d_t input_a, float b, __write_ } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a < (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a < (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastLessEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -399,7 +399,7 @@ __kernel void BroadcastLessEqual_IMG(__read_only image2d_t input_a, float b, __w } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a <= (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a <= (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastGreater_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -411,7 +411,7 @@ __kernel void BroadcastGreater_IMG(__read_only image2d_t input_a, float b, __wri } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a > (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a > (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void BroadcastGreaterEqual_IMG(__read_only image2d_t input_a, float b, __write_only image2d_t output, @@ -423,7 +423,7 @@ __kernel void BroadcastGreaterEqual_IMG(__read_only image2d_t input_a, float b, } FLT4 a = READ_IMAGE(input_a, smp_none, (int2)(X, Y)); - WRITE_IMAGE(output, (int2)(X, Y), AS_FLT4(a >= (FLT4)b)); + WRITE_IMAGE(output, (int2)(X, Y), a >= (FLT4)b ? (FLT4)1.f : (FLT4).0f); } __kernel void ElementAdd_BUF(__global float *input_a, __global float *input_b, __global float *output, diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index c329d4a502..97ccf03f68 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -311,26 +311,19 @@ int ScaleOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); } else { - if (in_tensors_[0]->data_type() == kNumberTypeFloat32) { + if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { float scale = static_cast(in_tensors_[1]->data_c())[0]; float offset = static_cast(in_tensors_[2]->data_c())[0]; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); - } else if (in_tensors_[0]->data_type() == kNumberTypeFloat16) { - if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { - float scale = static_cast(in_tensors_[1]->data_c())[0]; - float offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); - } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { - float16_t scale = static_cast(in_tensors_[1]->data_c())[0]; - float16_t offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(scale)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, Float32ToShort(offset)); - } else { - MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); - return RET_ERROR; - } + } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { + float16_t scale = static_cast(in_tensors_[1]->data_c())[0]; + float16_t offset = static_cast(in_tensors_[2]->data_c())[0]; + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(scale)); + ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(offset)); + } else { + MS_LOG(ERROR) << "Unsupport data type " << in_tensors_[1]->data_type(); + return RET_ERROR; } } ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); diff --git a/mindspore/lite/test/run_test.sh b/mindspore/lite/test/run_test.sh index 3307238195..44ade02040 100755 --- a/mindspore/lite/test/run_test.sh +++ b/mindspore/lite/test/run_test.sh @@ -31,12 +31,10 @@ cp -fr $TEST_DATA_DIR/testPK ./data # for GPU OpenCL ./lite-test --gtest_filter="TestConvolutionOpenCL.simple_test*" - ./lite-test --gtest_filter="TestArithmeticSelfOpenCLCI.ArithmeticSelfRound*" ./lite-test --gtest_filter="TestConcatOpenCLCI.ConcatFp32_2inputforCI*" ./lite-test --gtest_filter="TestSliceOpenCLfp32.Slicefp32CI*" ./lite-test --gtest_filter="TestBatchnormOpenCLCI.Batchnormfp32CI*" - ./lite-test --gtest_filter="TestAvgPoolingOpenCL*" ./lite-test --gtest_filter="TestConv2dTransposeOpenCL*" ./lite-test --gtest_filter="TestMatMulOpenCL.MatMul2D*" @@ -46,3 +44,5 @@ cp -fr $TEST_DATA_DIR/testPK ./data ./lite-test --gtest_filter="TestReshapeOpenCL*" ./lite-test --gtest_filter="TestSoftmaxOpenCL*" ./lite-test --gtest_filter="TestTransposeOpenCL*" +./lite-test --gtest_filter="TestArithmeticOpenCL*" +./lite-test --gtest_filter="TestScaleOpenCL*" diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index 63bb2022b2..1408a50d8f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -67,6 +67,7 @@ static void LogData(void *data, const int size, const std::string prefix) { template static void TestCase(const std::vector &shape_a, const std::vector &shape_b) { + bool is_log_data = false; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto allocator = ocl_runtime->GetAllocator(); @@ -126,7 +127,7 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh } std::vector outputs = {tensor_c}; - ArithmeticParameter *param = new (std::nothrow) ArithmeticParameter(); + ArithmeticParameter *param = static_cast(malloc(sizeof(ArithmeticParameter))); param->broadcasting_ = is_bias_add; if (param == nullptr) { MS_LOG(ERROR) << "Create parameter failed!"; @@ -156,7 +157,7 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_b; delete[] data_c_cpu; delete[] data_c_ocl; - delete param; + free(param); return; } arith_kernel->Init(); @@ -188,10 +189,12 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh memcpy(data_c_ocl, outputs[0]->data_c(), sizeof(T) * element_num); - LogData(data_a, 10, "Data A : "); - LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); - LogData(data_c_cpu, 10, "Expect compute : "); - LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); + if (is_log_data) { + LogData(data_a, 10, "Data A : "); + LogData(data_b, tensor_b->shape().empty() ? 1 : 10, "Data B : "); + LogData(data_c_cpu, 10, "Expect compute : "); + LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); + } bool cmp = DataCompare(data_c_cpu, data_c_ocl, element_num); MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); EXPECT_EQ(true, cmp); @@ -203,7 +206,6 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_c_ocl; delete kernel; - delete param; for (auto tensor : inputs) { delete tensor; } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc index 8224918397..1875aec574 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc @@ -67,6 +67,7 @@ static void LogData(void *data, const int size, const std::string prefix) { template static void TestCase(const std::vector &shape_a, const std::vector &shape_b) { + bool is_log_data = false; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto allocator = ocl_runtime->GetAllocator(); @@ -137,7 +138,7 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh } std::vector outputs = {tensor_out}; - ScaleParameter *param = new (std::nothrow) ScaleParameter(); + ScaleParameter *param = static_cast(malloc(sizeof(ScaleParameter))); if (param == nullptr) { MS_LOG(ERROR) << "Create parameter failed!"; delete tensor_in; @@ -170,7 +171,7 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_offset; delete[] data_out_cpu; delete[] data_out_ocl; - delete param; + free(param); return; } scale_kernel->Init(); @@ -206,11 +207,13 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh memcpy(data_out_ocl, outputs[0]->data_c(), sizeof(T) * element_num); - LogData(data_in, 10, "Data input : "); - LogData(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : "); - LogData(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : "); - LogData(data_out_cpu, 10, "Expect compute : "); - LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); + if (is_log_data) { + LogData(data_in, 10, "Data input : "); + LogData(data_scale, tensor_scale->shape().empty() ? 1 : 10, "Data scale : "); + LogData(data_offset, tensor_offset->shape().empty() ? 1 : 10, "Data offset : "); + LogData(data_out_cpu, 10, "Expect compute : "); + LogData(outputs[0]->data_c(), 10, "OpenCL compute : "); + } bool cmp = DataCompare(data_out_cpu, data_out_ocl, element_num); MS_LOG(INFO) << "Compare " << (cmp ? "success!" : "failed!"); EXPECT_EQ(true, cmp); @@ -223,7 +226,6 @@ static void TestCase(const std::vector &shape_a, const std::vector &sh delete[] data_out_ocl; delete kernel; - delete param; for (auto tensor : inputs) { delete tensor; }