diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl index 6266d14d3c..2a688df136 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/activation.cl @@ -91,3 +91,16 @@ __kernel void HSwish(__read_only image2d_t input, __write_only image2d_t output, result.w = temp.w * (temp.w <= -3 ? 0 : (temp.w >= 3 ? 1 : temp.w / 6 + 0.5f)); WRITE_IMAGE(output, (int2)(X, Y), result); } + +__kernel void HSigmoid(__read_only image2d_t input, __write_only image2d_t output, const int2 img_shape) { + int X = get_global_id(0); // w*c + int Y = get_global_id(1); // n*h + if (X >= img_shape.x || Y >= img_shape.y) return; + FLT4 temp = READ_IMAGE(input, smp_zero, (int2)(X, Y)); + FLT4 result = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); + result.x = temp.x <= -3 ? 0 : (temp.x >= 3 ? 1 : temp.x / 6 + 0.5f); + result.y = temp.y <= -3 ? 0 : (temp.y >= 3 ? 1 : temp.y / 6 + 0.5f); + result.z = temp.z <= -3 ? 0 : (temp.z >= 3 ? 1 : temp.z / 6 + 0.5f); + result.w = temp.w <= -3 ? 0 : (temp.w >= 3 ? 1 : temp.w / 6 + 0.5f); + WRITE_IMAGE(output, (int2)(X, Y), result); +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/convolution.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl similarity index 92% rename from mindspore/lite/src/runtime/kernel/opencl/cl/convolution.cl rename to mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl index cbed06393a..cdf8e06205 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/convolution.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/conv2d.cl @@ -28,9 +28,9 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP return; \ } -__kernel void Convolution_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, - __global FLT4 *bias, const int4 input_shape, const int4 output_shape, - const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { +__kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, + __global FLT4 *bias, const int4 input_shape, const int4 output_shape, + const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { const int BlockH = 1; const int BlockW = 1; const int BlockC = 1; @@ -84,9 +84,9 @@ __kernel void Convolution_H1W1C1(__read_only image2d_t input, __write_only image } } -__kernel void Convolution_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, - __global FLT4 *bias, const int4 input_shape, const int4 output_shape, - const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { +__kernel void Conv2D_H2W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, + __global FLT4 *bias, const int4 input_shape, const int4 output_shape, + const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { const int BlockH = 2; const int BlockW = 1; const int BlockC = 1; @@ -161,9 +161,9 @@ __kernel void Convolution_H2W1C1(__read_only image2d_t input, __write_only image } } -__kernel void Convolution_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, - __global FLT4 *bias, const int4 input_shape, const int4 output_shape, - const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { +__kernel void Conv2D_H2W1C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, + __global FLT4 *bias, const int4 input_shape, const int4 output_shape, + const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { const int BlockH = 2; const int BlockW = 1; const int BlockC = 2; @@ -268,9 +268,9 @@ __kernel void Convolution_H2W1C2(__read_only image2d_t input, __write_only image } } -__kernel void Convolution_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, - __global FLT4 *bias, const int4 input_shape, const int4 output_shape, - const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { +__kernel void Conv2D_H2W2C2(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight, + __global FLT4 *bias, const int4 input_shape, const int4 output_shape, + const int4 kernel_stride, const int4 pad, const int2 dilation, const int act_type) { const int BlockH = 2; const int BlockW = 2; const int BlockC = 2; diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl index cfd4c7d1ca..3ac91e2df0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/gather.cl @@ -1,7 +1,9 @@ #pragma OPENCL EXTENSION cl_khr_fp16 : enable + __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; -__kernel void gather_NHWC4(__write_only image2d_t dst_data, __read_only image2d_t src_data, __global int *indices, - int4 src_size, int4 dst_size, int indices_num, int axis) { + +__kernel void gather(__write_only image2d_t dst_data, __read_only image2d_t src_data, __global int *indices, + int4 src_size, int4 dst_size, int indices_num, int axis) { int X = get_global_id(0); // w int Y = get_global_id(1); // n*h int Z = get_global_id(2); // c @@ -40,48 +42,3 @@ __kernel void gather_NHWC4(__write_only image2d_t dst_data, __read_only image2d_ } WRITE_IMAGE(dst_data, (int2)(X * dst_size.z + Z, batch * dst_size.y + height), res_data); } - -__kernel void gather_NC4HW4(__write_only image2d_t dst_data, __read_only image2d_t src_data, __global int *indices, - int4 src_size, int4 dst_size, int indices_num, int axis) { - int X = get_global_id(0); // w - int Y = get_global_id(1); // n*h - int Z = get_global_id(2); // c - if (X >= dst_size.x || Y >= dst_size.y * dst_size.w || Z >= dst_size.z || dst_size.y == 0) { - return; - } - FLT4 res_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); - int batch = Y / dst_size.y; - int height = Y % dst_size.y; - if (axis == 0) { - int index_y = indices[batch] * src_size.y * src_size.z + Z * src_size.y + height; - res_data = READ_IMAGE(src_data, smp_zero, (int2)(X, index_y)); - } else if (axis == 1) { - int index_y = batch * src_size.y * src_size.z + Z * src_size.y + indices[height]; - res_data = READ_IMAGE(src_data, smp_zero, (int2)(X, index_y)); - } else if (axis == 2) { - int index_y = batch * src_size.y * src_size.z + Z * src_size.y + height; - res_data = READ_IMAGE(src_data, smp_zero, (int2)(indices[X], index_y)); - } else if (axis == 3) { - int offset[4] = {indices[Z * 4] / 4, indices[Z * 4 + 1] / 4, indices[Z * 4 + 2] / 4, indices[Z * 4 + 3] / 4}; - FLT tmp[4]; - FLT res_tmp[4]; - for (int i = 0; i < indices_num; ++i) { - FLT4 rd_data = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); - int index_y = batch * src_size.y * src_size.z + offset[i] * src_size.y + height; - rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X, index_y)); - if (i >= 1 && offset[i] != offset[i - 1]) { - rd_data = READ_IMAGE(src_data, smp_zero, (int2)(X, index_y)); - } - tmp[0] = rd_data.x; - tmp[1] = rd_data.y; - tmp[2] = rd_data.z; - tmp[3] = rd_data.w; - res_tmp[i] = tmp[indices[Z * 4 + i] % 4]; - } - res_data.x = res_tmp[0]; - res_data.y = res_tmp[1]; - res_data.z = res_tmp[2]; - res_data.w = res_tmp[3]; - } - WRITE_IMAGE(dst_data, (int2)(X, (batch * dst_size.y * dst_size.z + Z * dst_size.y + height)), res_data); -} diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/hswish.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/hswish.cl deleted file mode 100644 index fa7486bb63..0000000000 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/hswish.cl +++ /dev/null @@ -1,19 +0,0 @@ -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; -__kernel void hswish(__read_only image2d_t src_data, __write_only image2d_t dst_data, int4 tensor_shape) { - int X = get_global_id(0); // n*h n: default =1 - int Y = get_global_id(1); // w - int Z = get_global_id(2); // c - if (X >= tensor_shape.x * tensor_shape.y || Y >= tensor_shape.z || Z >= tensor_shape.w || tensor_shape.y == 0) { - return; - } - int n = X / tensor_shape.y; - int h = X % tensor_shape.y; - FLT4 temp = READ_IMAGE(src_data, smp_none, (int2)((Y)*tensor_shape.w + Z, (n * tensor_shape.y + h))); - FLT4 result = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); - result.x = temp.x <= -3 ? 0 : (temp.x >= 3 ? 1 : temp.x / 6 + 0.5f); - result.y = temp.y <= -3 ? 0 : (temp.y >= 3 ? 1 : temp.y / 6 + 0.5f); - result.z = temp.z <= -3 ? 0 : (temp.z >= 3 ? 1 : temp.z / 6 + 0.5f); - result.w = temp.w <= -3 ? 0 : (temp.w >= 3 ? 1 : temp.w / 6 + 0.5f); - WRITE_IMAGE(dst_data, (int2)((Y)*tensor_shape.w + Z, (n * tensor_shape.y + h)), result); -} diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/pad.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/pad.cl index e979b0ba45..7eba6bf70d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/pad.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/pad.cl @@ -2,35 +2,56 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; -#define Pad(dataformat, in_x, in_y, out_x, out_y) \ - __kernel void Pad_##dataformat(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape, \ - int4 output_shape, int2 pad, float constant_value) { \ - int oh = get_global_id(0); \ - int ow = get_global_id(1); \ - int co_slice = get_global_id(2); \ - int OH = output_shape.y; \ - int OW = output_shape.z; \ - int CO_SLICES = output_shape.w; \ - \ - if (oh >= OH || ow >= OW || co_slice >= CO_SLICES) { \ - return; \ - } \ - \ - int IH = input_shape.y; \ - int IW = input_shape.z; \ - int CI_SLICES = input_shape.w; \ - \ - int pad_top = pad.x; \ - int pad_left = pad.y; \ - int ih = oh - pad_top; \ - int iw = ow - pad_left; \ - \ - FLT4 result = (FLT4)(constant_value); \ - if (ih >= 0 && ih < IH && iw >= 0 && iw < IW) { \ - result = READ_IMAGE(input, smp_zero, (int2)(in_x, in_y)); \ - } \ - WRITE_IMAGE(output, (int2)(out_x, out_y), result); \ +__kernel void Pad(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape, int4 output_shape, + int2 io_slices, int4 pad_before, float constant_value) { + int IN = input_shape.x, IH = input_shape.y, IW = input_shape.z, CI = input_shape.w; + int ON = output_shape.x, OH = output_shape.y, OW = output_shape.z, CO = output_shape.w; + int CI_SLICES = io_slices.x, CO_SLICES = io_slices.y; + int on_oh = get_global_id(0); + int ow = get_global_id(1); + int co_slice = get_global_id(2); + int on = on_oh / OH; + int oh = on_oh % OH; + if (on >= ON || oh >= OH || ow >= OW || co_slice >= CO_SLICES) { + return; } -Pad(NHWC4, iw *CI_SLICES + co_slice, ih, ow *CO_SLICES + co_slice, oh); -Pad(NC4HW4, iw, co_slice *IH + ih, ow, co_slice *OH + oh); + int in = on - pad_before.x; + int ih = oh - pad_before.y; + int iw = ow - pad_before.z; + int ci = co_slice * 4 - pad_before.w; + if (in < 0 || in >= IN || ih < 0 || ih >= IH || iw < 0 || iw >= IW || ci + 3 < 0 || ci >= CI) { + WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), (FLT4)(constant_value)); + return; + } + + int offset = ci % 4; + if (offset < 0) { + offset += 4; + } + FLT4 src0 = READ_IMAGE(input, smp_zero, (int2)(iw * CI_SLICES + ci / 4, in * IH + ih)); + if (offset == 0 && ci >= 0 && ci + 3 < CI) { + WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), src0); + return; + } + FLT4 src1 = READ_IMAGE(input, smp_zero, (int2)(iw * CI_SLICES + (ci + 4) / 4, in * IH + ih)); + FLT4 src_f4; + if (offset == 0) { + src_f4 = (FLT4)(src0.x, src0.y, src0.z, src0.w); + } else if (offset == 1) { + src_f4 = (FLT4)(src0.y, src0.z, src0.w, src1.x); + } else if (offset == 2) { + src_f4 = (FLT4)(src0.z, src0.w, src1.x, src1.y); + } else { // if (offset==3) + src_f4 = (FLT4)(src0.w, src1.x, src1.y, src1.z); + } + FLT src[4] = {src_f4.x, src_f4.y, src_f4.z, src_f4.w}; + FLT out[4] = {constant_value, constant_value, constant_value, constant_value}; + for (int i = 0; i < 4; ++i) { + if (ci + i >= 0 && ci + i < CI) { + out[i] = src[i]; + } + } + FLT4 out_f4 = (FLT4)(out[0], out[1], out[2], out[3]); + WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), out_f4); +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index e3fa1cc1c1..49d267ae81 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -31,6 +31,7 @@ using mindspore::kernel::KERNEL_ARCH::kGPU; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; +using mindspore::schema::ActivationType_HSIGMOID; using mindspore::schema::ActivationType_HSWISH; using mindspore::schema::ActivationType_LEAKY_RELU; using mindspore::schema::ActivationType_RELU; @@ -44,9 +45,9 @@ namespace mindspore::kernel { std::string ActivationOpenCLKernel::GetActTypeString(int act_type) { static std::map supported_act_type = { - {ActivationType_LEAKY_RELU, "LeakyRelu"}, {ActivationType_RELU, "Relu"}, {ActivationType_SIGMOID, "Sigmoid"}, - {ActivationType_RELU6, "Relu6"}, {ActivationType_TANH, "Tanh"}, {ActivationType_SWISH, "Swish"}, - {ActivationType_HSWISH, "HSwish"}}; + {ActivationType_LEAKY_RELU, "LeakyRelu"}, {ActivationType_RELU, "Relu"}, {ActivationType_SIGMOID, "Sigmoid"}, + {ActivationType_RELU6, "Relu6"}, {ActivationType_TANH, "Tanh"}, {ActivationType_SWISH, "Swish"}, + {ActivationType_HSWISH, "HSwish"}, {ActivationType_HSIGMOID, "HSigmoid"}}; auto result_iter = supported_act_type.find(act_type); if (result_iter != supported_act_type.end()) { return result_iter->second; @@ -63,13 +64,12 @@ int ActivationOpenCLKernel::CheckSpecs() { } int ActivationOpenCLKernel::Prepare() { - outShape = Image2DInfo(out_tensors_[0]); + outShape = GpuTensorInfo(out_tensors_[0]); std::string source = activation_source; - std::set build_options; std::string program_name = "Activation"; ocl_runtime_->LoadSource(program_name, source); std::string kernel_name = GetActTypeString(type_); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); SetConstArgs(); SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " init Done!"; @@ -101,7 +101,7 @@ int ActivationOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h index 6c0b4635fc..5abac64eba 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h @@ -45,7 +45,7 @@ class ActivationOpenCLKernel : public OpenCLKernel { cl::Kernel kernel_; int type_; float alpha_; - Image2DInfo outShape = Image2DInfo(nullptr); + GpuTensorInfo outShape = GpuTensorInfo(nullptr); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index ab53aeb196..8939f9955d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -76,7 +76,7 @@ void ArgMinMaxOpenCLKernel::SetGlobalLocal() { auto in_shape = in_tensors_[0]->shape(); auto in_shape_align = in_shape; in_shape_align[3] = UP_ROUND(in_shape[3], C4NUM); - im_in_ = Image2DInfo(in_tensors_[0]); + im_in_ = GpuTensorInfo(in_tensors_[0]); auto out_shape_align = in_shape_align; out_shape_align.at(param->axis_) = param->axis_ == 3 ? UP_ROUND(param->topk_, C4NUM) : param->topk_; int reduce_len = GetUpPow2(in_shape.at(param->axis_)); @@ -152,8 +152,7 @@ int ArgMinMaxOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); - + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h index 80910d3c12..bf6aa1428e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h @@ -43,7 +43,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel { cl::Kernel kernel_; void *buff_{nullptr}; void *ids_{nullptr}; - Image2DInfo im_in_{Image2DInfo(nullptr)}; + GpuTensorInfo im_in_{GpuTensorInfo(nullptr)}; cl_int4 src_size_; cl_int4 cus_size_; cl_int4 strides_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 859aafa2b2..fff321f789 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -245,10 +245,9 @@ int ArithmeticOpenCLKernel::Prepare() { kernel_name_ += "_BUF"; } std::string program_name = "Arithmetic"; - std::set build_options; std::string source = arithmetic_source; ocl_runtime_->LoadSource(program_name, source); - error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name_, build_options); + error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name_); #endif if (error_code != RET_OK) { return error_code; @@ -270,7 +269,7 @@ int ArithmeticOpenCLKernel::Run() { auto input_1_ptr = inputs_weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : inputs_weight_ptrs_[1]; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 30d13d9a2f..2a711722d1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -157,8 +157,7 @@ int ArithmeticSelfOpenCLKernel::Run() { int arg_cn = 0; ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); - + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc index a8720ad60f..f79c4b08c9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc @@ -87,11 +87,10 @@ int BatchToSpaceNDOpenCLKernel::Prepare() { kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = batch_to_space_nd_source; std::string program_name = "batch_to_space_nd"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetGlobalLocal(); @@ -102,9 +101,9 @@ int BatchToSpaceNDOpenCLKernel::Prepare() { int BatchToSpaceNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index dd1d4d8b58..7231431e54 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -91,8 +91,7 @@ int BatchNormOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[3]->data_c()); // mean ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[4]->data_c()); // variance ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); - + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc index 48ec0d9c94..a48949780b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/biasadd.cc @@ -71,12 +71,11 @@ int BiasAddOpenCLKernel::Init() { return mindspore::lite::RET_ERROR; } InitWeights(); - std::set build_options; std::string source = biasadd_source; std::string program_name = "BiasAdd"; std::string kernel_name = "BiasAdd"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); MS_LOG(DEBUG) << program_name << " Init Done!"; return mindspore::lite::RET_OK; @@ -95,7 +94,7 @@ int BiasAddOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, data_type[schema::Format::Format_NHWC4]); std::vector local = {1, 1}; std::vector global = {static_cast(global_size.s[1]), static_cast(global_size.s[2])}; - auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global, local); if (ret != mindspore::lite::RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; return mindspore::lite::RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc index 9b5a0b2fa8..65f94248b9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc @@ -100,8 +100,7 @@ int CastOpenCLKernel::Run() { int arg_cn = 0; ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); - + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 267fabfed9..9c8d3a12b5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -114,11 +114,10 @@ int ConcatOpenCLKernel::Prepare() { } kernel_name += "_NHWC4"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; - std::set build_options; std::string source = concat_source; std::string program_name = "Concat"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); MS_LOG(DEBUG) << kernel_name << " Init Done!"; SetGlobalLocal(); SetConstArgs(); @@ -146,7 +145,7 @@ int ConcatOpenCLKernel::Run() { MS_LOG(ERROR) << "unsupported input size :" << in_tensors_.size(); return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc similarity index 63% rename from mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc rename to mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 3909b57589..a4eca9d3cd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -18,12 +18,12 @@ #include #include #include "src/common/utils.h" -#include "src/runtime/kernel/opencl/kernel/convolution.h" +#include "src/runtime/kernel/opencl/kernel/conv2d.h" #include "src/runtime/kernel/opencl/kernel/fullconnection.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/kernel_registry.h" #include "include/errorcode.h" -#include "src/runtime/kernel/opencl/cl/convolution.cl.inc" +#include "src/runtime/kernel/opencl/cl/conv2d.cl.inc" #include "src/runtime/kernel/opencl/cl/winograd.cl.inc" using mindspore::kernel::KERNEL_ARCH::kGPU; @@ -38,19 +38,43 @@ namespace mindspore::kernel { constexpr size_t CI_TILE = C4NUM; constexpr size_t CO_TILE = C4NUM; -int ConvolutionOpenCLKernel::Init() { +int Conv2DOpenCLKernel::CheckSpecs() { + if (in_tensors_.size() != 2 && in_tensors_.size() != 3) { + MS_LOG(ERROR) << "Conv2D only supports 2 or 3 input Tensor but get " << in_tensors_.size(); + return RET_ERROR; + } + if (out_tensors_.size() != 1) { + MS_LOG(ERROR) << "Conv2D only supports 1 output Tensor but get " << out_tensors_.size(); + return RET_ERROR; + } + if (in_tensors_.front()->shape().size() != 4) { + MS_LOG(ERROR) << "Conv2D only supports 4D input Tensor but get " << in_tensors_.front()->shape().size() << "D."; + return RET_ERROR; + } + if (in_tensors_[1]->shape().size() != 4) { + MS_LOG(ERROR) << "Conv2D only supports 4D filter Tensor but get " << in_tensors_[1]->shape().size() << "D."; + return RET_ERROR; + } + if (out_tensors_.front()->shape().size() != 4) { + MS_LOG(ERROR) << "Conv2D only supports 4D output Tensor but get " << out_tensors_.front()->shape().size() << "D."; + return RET_ERROR; + } + return RET_OK; +} + +int Conv2DOpenCLKernel::Prepare() { use_fp16_ = ocl_runtime_->GetFp16Enable(); sizeof_FLT_ = use_fp16_ ? sizeof(float16_t) : sizeof(float); - auto input_tensor = in_tensors_[0]; - auto output_tensor = out_tensors_[0]; - batch_size_ = input_tensor->Batch(); - CI_ = input_tensor->Channel(); - IH_ = input_tensor->Height(); - IW_ = input_tensor->Width(); - CO_ = output_tensor->Channel(); - OH_ = output_tensor->Height(); - OW_ = output_tensor->Width(); + auto input_shape = in_tensors_.front()->shape(); + auto output_shape = out_tensors_.front()->shape(); + batch_size_ = input_shape[0]; + IH_ = input_shape[1]; + IW_ = input_shape[2]; + CI_ = input_shape[3]; + OH_ = output_shape[1]; + OW_ = output_shape[2]; + CO_ = output_shape[3]; CI_SLICES_ = UP_DIV(CI_, C4NUM); CO_SLICES_ = UP_DIV(CO_, C4NUM); KH_ = param_->kernel_h_; @@ -63,26 +87,21 @@ int ConvolutionOpenCLKernel::Init() { TILES_XY_ = TILES_X_ * TILES_Y_; use_winograd_ = UseWinograd4x4To6x6(); - if (!use_winograd_) { - SetBlockSize(); - SetGlobalLocal(); - } - // build kernel - std::set build_options; if (use_winograd_) { MS_LOG(DEBUG) << "use winograd"; - std::string program_name = "Winograd"; + std::string program_name = "winograd"; ocl_runtime_->LoadSource(program_name, winograd_source); - ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36", build_options); - ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution", build_options); - ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4", build_options); + ocl_runtime_->BuildKernel(kernel_4x4to36_, program_name, "Winograd4x4To36"); + ocl_runtime_->BuildKernel(kernel_conv_, program_name, "WinogradConvolution"); + ocl_runtime_->BuildKernel(kernel_36to4x4_, program_name, "Winograd36To4x4"); } else { - std::string program_name = "Convolution"; - std::string kernel_name = "Convolution_H" + std::to_string(block_size_.H) + "W" + std::to_string(block_size_.W) + - "C" + std::to_string(block_size_.C); - ocl_runtime_->LoadSource("Convolution", convolution_source); - ocl_runtime_->BuildKernel(kernel_conv_, program_name, kernel_name, build_options); + SetBlockSize(); + std::string program_name = "conv2d"; + std::string kernel_name = "Conv2D_H" + std::to_string(block_size_.H) + "W" + std::to_string(block_size_.W) + "C" + + std::to_string(block_size_.C); + ocl_runtime_->LoadSource(program_name, conv2d_source); + ocl_runtime_->BuildKernel(kernel_conv_, program_name, kernel_name); } // allocate winograd memory @@ -102,12 +121,12 @@ int ConvolutionOpenCLKernel::Init() { } InitWeights(); - - MS_LOG(DEBUG) << "Convolution Init Done!"; + SetGlobalLocal(); + SetConstArgs(); return RET_OK; } -int ConvolutionOpenCLKernel::GenerateWinogradWeight() { +int Conv2DOpenCLKernel::GenerateWinogradFilter() { constexpr float Gt[] = {1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 1.0000000000, 0.0000000000, 0.0000000000, 0.7071067691, -0.7071067691, 1.4142135382, -1.4142135382, 0.0000000000, 0.0000000000, 0.4999999702, 0.4999999702, 1.9999998808, 1.9999998808, 1.0000000000}; @@ -159,7 +178,7 @@ int ConvolutionOpenCLKernel::GenerateWinogradWeight() { return RET_OK; } -int ConvolutionOpenCLKernel::InitWeight() { +int Conv2DOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); // allocate memory @@ -175,7 +194,7 @@ int ConvolutionOpenCLKernel::InitWeight() { // rearrange weight if (use_winograd_) { - GenerateWinogradWeight(); + GenerateWinogradFilter(); } else { auto weight_tensor = in_tensors_[1]; if (weight_tensor->data_type() == kNumberTypeFloat16) { @@ -201,7 +220,7 @@ int ConvolutionOpenCLKernel::InitWeight() { return RET_OK; } -int ConvolutionOpenCLKernel::InitBias() { +int Conv2DOpenCLKernel::InitBias() { auto allocator = ocl_runtime_->GetAllocator(); // align bias from C to C4 @@ -236,15 +255,15 @@ int ConvolutionOpenCLKernel::InitBias() { return RET_OK; } -int ConvolutionOpenCLKernel::InitWeights() { - InitWeight(); +int Conv2DOpenCLKernel::InitWeights() { + InitFilter(); if (has_bias_) { InitBias(); } return RET_OK; } -void ConvolutionOpenCLKernel::SetBlockSize() { +void Conv2DOpenCLKernel::SetBlockSize() { auto task_size = static_cast(batch_size_ * OH_ * OW_ * CO_SLICES_); auto task_size_per_cu = task_size / ocl_runtime_->DeviceComputeUnits(); int block_size; @@ -277,35 +296,44 @@ void ConvolutionOpenCLKernel::SetBlockSize() { } } -void ConvolutionOpenCLKernel::SetGlobalLocal() { - size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H); - size_t global_w = UP_DIV(OW_, block_size_.W); - size_t global_c = UP_DIV(CO_SLICES_, block_size_.C); - - constexpr int local_c_max = 16; - constexpr int local_hw_max = 256; - constexpr int OH_threshold = 100; - constexpr int OW_threshold = 100; - constexpr int OC_threshold = 64; - size_t local_c = GetMaxDivisor(global_c, local_c_max); - local_c = std::max(local_c, 1); - size_t local_hw = local_hw_max / local_c; - size_t local_h; - size_t local_w; - if (OH_ >= OH_threshold && OW_ >= OW_threshold && CO_ <= OC_threshold) { // c -> w -> h - local_w = std::min(global_w, local_hw); - local_h = std::min(local_hw / local_w, global_h); - } else { // c -> h -> w - local_h = std::min(global_h, local_hw); - local_w = std::min(local_hw / local_h, global_w); - } +void AlignWinogradGlobalLocal(const std::vector &global, const std::vector &local, cl::NDRange *global_range, + cl::NDRange *local_range) { + *local_range = cl::NDRange(local[0], local[1], local[2]); + *global_range = + cl::NDRange(UP_ROUND(global[0], local[0]), UP_ROUND(global[1], local[1]), UP_ROUND(global[2], local[2])); +} - global_ = {global_h, global_w, global_c}; - local_ = {local_h, local_w, local_c}; +void Conv2DOpenCLKernel::SetGlobalLocal() { + if (use_winograd_) { + AlignWinogradGlobalLocal({TILES_XY_, 6, CI_SLICES_}, {8, 6, 4}, &global_4x4to36_, &local_4x4to36_); + AlignWinogradGlobalLocal({UP_DIV(TILES_XY_, 2), 36, UP_DIV(CO_SLICES_, 2)}, {8, 6, 2}, &global_conv_, &local_conv_); + AlignWinogradGlobalLocal({TILES_XY_, 4, CO_SLICES_}, {32, 4, 2}, &global_36to4x4_, &local_36to4x4_); + } else { + size_t global_h = batch_size_ * UP_DIV(OH_, block_size_.H); + size_t global_w = UP_DIV(OW_, block_size_.W); + size_t global_c = UP_DIV(CO_SLICES_, block_size_.C); + constexpr int local_c_max = 16; + constexpr int local_hw_max = 256; + constexpr int OH_threshold = 100; + constexpr int OW_threshold = 100; + constexpr int OC_threshold = 64; + size_t local_c = GetMaxDivisor(global_c, local_c_max); + local_c = std::max(local_c, 1); + size_t local_hw = local_hw_max / local_c; + size_t local_h; + size_t local_w; + if (OH_ >= OH_threshold && OW_ >= OW_threshold && CO_ <= OC_threshold) { // c -> w -> h + local_w = std::min(global_w, local_hw); + local_h = std::min(local_hw / local_w, global_h); + } else { // c -> h -> w + local_h = std::min(global_h, local_hw); + local_w = std::min(local_hw / local_h, global_w); + } + AlignGlobalLocal({global_h, global_w, global_c}, {local_h, local_w, local_c}); + } } -int ConvolutionOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; +void Conv2DOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(op_parameter_); cl_int act_type = 0; if (param->act_type_ == ActType_Relu) { @@ -318,37 +346,33 @@ int ConvolutionOpenCLKernel::Run() { int arg_cn; if (use_winograd_) { - arg_cn = 0; + arg_cn = 1; cl_int4 _4x4to36_out_shape = {1, 36, TILES_XY_, CI_SLICES_}; - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_); ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, _4x4to36_out_shape); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, _4x4to36_out_shape); arg_cn = 0; cl_int4 conv_in_shape = {1, 36, TILES_XY_, CI_SLICES_}; cl_int4 conv_out_shape = {1, 36, TILES_XY_, CO_SLICES_}; - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_, lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem0_); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, winograd_mem1_); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_in_shape); - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, conv_out_shape); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn, conv_out_shape); - arg_cn = 0; + arg_cn = 2; cl_int4 _36to4x4_in_shape = {1, 16, TILES_XY_, CO_SLICES_}; - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, winograd_mem1_, lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_); ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, _36to4x4_in_shape); ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, act_type); + ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, act_type); } else { - arg_cn = 0; + arg_cn = 2; cl_int4 kernel_stride = {KH_, KW_, param->stride_h_, param->stride_w_}; cl_int4 pad = {param->pad_u_, param->pad_d_, param->pad_l_, param->pad_r_}; cl_int2 dilation = {param->dilation_h_, param->dilation_w_}; - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_weight_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, packed_bias_, lite::opencl::MemType::BUF); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, input_shape); @@ -356,71 +380,86 @@ int ConvolutionOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, kernel_stride); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, pad); ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, dilation); - ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn++, act_type); + ocl_runtime_->SetKernelArg(kernel_conv_, arg_cn, act_type); } +} +int Conv2DOpenCLKernel::Run() { if (use_winograd_) { - ocl_runtime_->RunKernel(kernel_4x4to36_, std::vector({size_t(TILES_XY_), 6, size_t(CI_SLICES_)}), - std::vector({8, 6, 4}), nullptr); - ocl_runtime_->RunKernel(kernel_conv_, - std::vector({size_t(UP_DIV(TILES_XY_, 2)), 36, size_t(UP_DIV(CO_SLICES_, 2))}), - std::vector({8, 6, 2}), nullptr); - ocl_runtime_->RunKernel(kernel_36to4x4_, std::vector({size_t(TILES_XY_), 4, size_t(CO_SLICES_)}), - std::vector({32, 4, 2}), nullptr); + ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); + ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_); + + ocl_runtime_->RunKernel(kernel_conv_, global_conv_, local_conv_); + + ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()); + ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_); } else { - ocl_runtime_->RunKernel(kernel_conv_, global_, local_, nullptr); + ocl_runtime_->SetKernelArg(kernel_conv_, 0, in_tensors_.front()->data_c()); + ocl_runtime_->SetKernelArg(kernel_conv_, 1, out_tensors_.front()->data_c()); + ocl_runtime_->RunKernel(kernel_conv_, global_range_, local_range_); } - return RET_OK; } +bool UseFcReplaceConv(const std::vector &inputs, const std::vector &outputs, + ConvParameter *param) { + auto input_shape = inputs.front()->shape(); + auto output_shape = inputs.front()->shape(); + // IH=1 IW=1 OH=1 OW=1 + bool hw_is_1 = input_shape.size() == 4 && input_shape[1] == 1 && input_shape[2] == 1 && output_shape.size() == 4 && + output_shape[1] == 1 && output_shape[2] == 1; + bool attr_valid = param->kernel_h_ == 1 && param->kernel_w_ == 1 && param->stride_h_ == 1 && param->stride_w_ == 1 && + param->pad_u_ == 0 && param->pad_d_ == 0 && param->pad_l_ == 0 && param->pad_r_ == 0 && + param->dilation_h_ == 1 && param->dilation_w_ == 1; + return hw_is_1 && attr_valid; +} + +OpParameter *CreateFcParam(const ConvParameter *conv_param) { + auto fc_param = static_cast(malloc(sizeof(MatMulParameter))); + if (fc_param == nullptr) { + MS_LOG(ERROR) << "Create FullConnection kernel param failed."; + return nullptr; + } + fc_param->op_parameter_.type_ = PrimitiveType_FullConnection; + fc_param->a_transpose_ = false; + fc_param->b_transpose_ = true; + fc_param->act_type_ = conv_param->act_type_; + return reinterpret_cast(fc_param); +} + kernel::LiteKernel *OpenCLConvolutionKernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) { - kernel::LiteKernel *kernel; - bool is_hw1 = inputs[0]->shape().size() == 4 && inputs[0]->shape()[1] == 1 && inputs[0]->shape()[2] == 1 && - outputs[0]->shape().size() == 4 && outputs[0]->shape()[1] == 1 && outputs[0]->shape()[2] == 1; - auto conv_param = reinterpret_cast(opParameter); - bool is_pad_stride_ok = conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1 && conv_param->stride_h_ == 1 && - conv_param->stride_w_ == 1 && conv_param->pad_u_ == 0 && conv_param->pad_d_ == 0 && - conv_param->pad_l_ == 0 && conv_param->pad_r_ == 0 && conv_param->dilation_h_ == 1 && - conv_param->dilation_w_ == 1; - + kernel::OpenCLKernel *kernel; OpParameter *real_param; - if (is_hw1 && is_pad_stride_ok) { - auto fc_param = static_cast(malloc(sizeof(MatMulParameter))); - if (fc_param == nullptr) { - MS_LOG(ERROR) << "Create OpenCL FullConnection kernel param failed!"; - return nullptr; - } - fc_param->op_parameter_.type_ = PrimitiveType_FullConnection; - fc_param->a_transpose_ = false; - fc_param->b_transpose_ = true; - fc_param->act_type_ = conv_param->act_type_; - kernel = new (std::nothrow) FullConnectionOpenCLKernel(reinterpret_cast(fc_param), inputs, outputs); - real_param = reinterpret_cast(fc_param); + auto *conv_param = reinterpret_cast(opParameter); + if (UseFcReplaceConv(inputs, outputs, conv_param)) { + auto *fc_param = CreateFcParam(conv_param); + kernel = new (std::nothrow) FullConnectionOpenCLKernel(fc_param, inputs, outputs); + real_param = fc_param; if (kernel == nullptr) { - MS_LOG(ERROR) << "Create OpenCL FullConnection kernel failed!"; + MS_LOG(ERROR) << "Create FullConnection kernel failed."; free(fc_param); free(conv_param); return nullptr; } else { free(conv_param); + MS_LOG(INFO) << "use FullConnection to replace Convolution."; } } else { - kernel = new (std::nothrow) ConvolutionOpenCLKernel(reinterpret_cast(conv_param), inputs, outputs); + kernel = new (std::nothrow) Conv2DOpenCLKernel(reinterpret_cast(conv_param), inputs, outputs); real_param = reinterpret_cast(conv_param); if (kernel == nullptr) { - MS_LOG(ERROR) << "Create OpenCL Convolution kernel failed!"; + MS_LOG(ERROR) << "Create Convolution kernel failed."; free(conv_param); return nullptr; } } - auto ret = kernel->Init(); + int ret = kernel->CheckSpecs(); if (ret != mindspore::lite::RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Convolution"; + MS_LOG(ERROR) << "Init Convolution kernel failed."; delete kernel; free(real_param); return nullptr; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h similarity index 76% rename from mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h rename to mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h index a76386d05c..769bab3d92 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONVOLUTION_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONVOLUTION_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONV2D_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONV2D_H_ #include #include @@ -27,23 +27,27 @@ namespace mindspore::kernel { -class ConvolutionOpenCLKernel : public OpenCLKernel { +class Conv2DOpenCLKernel : public OpenCLKernel { public: - ConvolutionOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + Conv2DOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs), param_(reinterpret_cast(parameter)) {} - ~ConvolutionOpenCLKernel() override = default; + ~Conv2DOpenCLKernel() override = default; - int Init() override; - int Run() override; - int InitWeights() override; + int CheckSpecs() override; + + int Prepare() override; void SetGlobalLocal() override; + int InitWeights() override; + void SetConstArgs() override; + + int Run() override; private: void SetBlockSize(); - int InitWeight(); + int InitFilter(); int InitBias(); - int GenerateWinogradWeight(); + int GenerateWinogradFilter(); bool UseWinograd4x4To6x6() { const bool attr_valid = param_->kernel_h_ == 3 && param_->kernel_w_ == 3 && param_->stride_h_ == 1 && @@ -58,8 +62,9 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { cl::Kernel kernel_4x4to36_; cl::Kernel kernel_conv_; cl::Kernel kernel_36to4x4_; - std::vector global_; - std::vector local_; + cl::NDRange global_4x4to36_, local_4x4to36_; + cl::NDRange global_conv_, local_conv_; + cl::NDRange global_36to4x4_, local_36to4x4_; bool use_fp16_{false}; size_t sizeof_FLT_{4}; @@ -95,4 +100,4 @@ class ConvolutionOpenCLKernel : public OpenCLKernel { }; } // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONVOLUTION_H_ +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_CONV2D_H_ diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index efc5e71ea4..8879fdea41 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -48,10 +48,9 @@ int Conv2dTransposeOpenCLKernel::Prepare() { kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::string source = conv2d_transpose_source; - std::set build_options; std::string program_name = "conv2d_transpose"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif InitWeights(); SetGlobalLocal(); @@ -194,7 +193,7 @@ int Conv2dTransposeOpenCLKernel::Run() { int arg_cnt = 0; ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 82289cabb0..fdc08ecb62 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -69,10 +69,9 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::string program_name = "DepthwiseConv2d"; - std::set build_options; std::string source = depthwise_conv2d_source; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif InitWeights(); SetGlobalLocal(); @@ -193,7 +192,7 @@ int DepthwiseConv2dOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index b2edf58f90..bb6d4432ec 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -71,16 +71,15 @@ int FullConnectionOpenCLKernel::CheckSpecs() { int FullConnectionOpenCLKernel::Prepare() { std::string kernel_name = "FullConnection_NHWC4"; - inShape = Image2DInfo(in_tensors_[0]); - outShape = Image2DInfo(out_tensors_[0]); + inShape = GpuTensorInfo(in_tensors_[0]); + outShape = GpuTensorInfo(out_tensors_[0]); #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = fullconnection_source; std::string program_name = "FullConnection"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif InitWeights(); SetConstArgs(); @@ -203,7 +202,7 @@ int FullConnectionOpenCLKernel::Run() { int arg_count = 0; ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h index 1455ff70db..1b2b8556b8 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h @@ -48,8 +48,8 @@ class FullConnectionOpenCLKernel : public OpenCLKernel { bool transposeB{true}; float activation_min_{-FLT_MAX}; float activation_max_{FLT_MAX}; - Image2DInfo inShape = Image2DInfo(nullptr); - Image2DInfo outShape = Image2DInfo(nullptr); + GpuTensorInfo inShape = GpuTensorInfo(nullptr); + GpuTensorInfo outShape = GpuTensorInfo(nullptr); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index 0de2158999..3145e93f0e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -30,47 +30,88 @@ using mindspore::schema::PrimitiveType_Gather; namespace mindspore::kernel { -int GatherOpenCLKernel::CheckSpecs() { return RET_OK; } +int GatherOpenCLKernel::CheckSpecs() { + if (in_tensors_.size() != 2) { + MS_LOG(ERROR) << "GatherOpenCLKernel only supports 2 input Tensor but get " << in_tensors_.size(); + return RET_ERROR; + } + if (out_tensors_.size() != 1) { + MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1 output Tensor but get " << out_tensors_.size(); + return RET_ERROR; + } + + if (in_tensors_.at(1)->category() == lite::Tensor::VAR) { + MS_LOG(ERROR) << "GatherOpenCLKernel only supports indices Tensor is weight."; + return RET_ERROR; + } + + int input_ndim = in_tensors_.front()->shape().size(); + if (input_ndim < 0 || input_ndim > 4) { + MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1-4D input Tensor but get " << input_ndim << "D."; + return RET_ERROR; + } + int indices_ndim = in_tensors_.at(1)->shape().size(); + if (indices_ndim != 1) { + MS_LOG(ERROR) << "GatherOpenCLKernel only supports 1D indices Tensor but get " << indices_ndim << "D."; + return RET_ERROR; + } + + TypeId data_type = in_tensors_.at(1)->data_type(); + if (data_type != kNumberTypeInt32 && data_type != kNumberTypeInt64 && data_type != kNumberTypeFloat32 && + data_type != kNumberTypeFloat16) { + MS_LOG(ERROR) << "Conv2D only supports Int32/Int64/Float32/Float16 indices Tensor."; + return RET_ERROR; + } + + auto *param = reinterpret_cast(this->op_parameter_); + axis_ = param->axis_; + if (axis_ < 0) { + axis_ += input_ndim; + } + if (axis_ < 0 || axis_ >= input_ndim) { + MS_LOG(ERROR) << "axis is invalid: axis=" << axis_ << "."; + return RET_ERROR; + } else { + return RET_OK; + } +} void GatherOpenCLKernel::SetConstArgs() { - auto param = reinterpret_cast(this->op_parameter_); - param->axis_ = (param->axis_ + in_tensors_[0]->shape().size()) % in_tensors_[0]->shape().size(); - auto input_shape = in_tensors_[0]->shape(); - auto output_shape = out_tensors_[0]->shape(); - int indices_num = in_tensors_[1]->ElementsNum(); - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); - size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); - cl_int4 src_size = {in_tensors_[0]->Width(), in_tensors_[0]->Height(), (cl_int)CI4, in_tensors_[0]->Batch()}; - cl_int4 dst_size = {(cl_int)out_tensors_[0]->Width(), (cl_int)out_tensors_[0]->Height(), (cl_int)CO4, - (cl_int)out_tensors_[0]->Batch()}; + auto input = GpuTensorInfo(in_tensors_.front()); + auto output = GpuTensorInfo(out_tensors_.front()); + int indices_num = in_tensors_.at(1)->ElementsNum(); + cl_int4 src_size = {static_cast(input.W), static_cast(input.H), static_cast(input.Slice), + static_cast(input.N)}; + cl_int4 dst_size = {static_cast(output.W), static_cast(output.H), static_cast(output.Slice), + static_cast(output.N)}; int arg_cnt = 3; ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, param->axis_); + ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_); } void GatherOpenCLKernel::SetGlobalLocal() { - size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); + auto output = GpuTensorInfo(out_tensors_.front()); std::vector local = {1, 1, 1}; - std::vector global = {(size_t)out_tensors_[0]->Width(), - (size_t)out_tensors_[0]->Batch() * (size_t)out_tensors_[0]->Height(), CO4}; + std::vector global = {output.W, output.N * output.H, output.Slice}; OpenCLKernel::AlignGlobalLocal(global, local); } int GatherOpenCLKernel::Prepare() { - std::string kernel_name = "gather_NHWC4"; + std::string kernel_name = "gather"; #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; - std::string source = gather_source; std::string program_name = "gather"; - ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->LoadSource(program_name, gather_source); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitWeights(); + int ret = InitWeights(); + if (ret != RET_OK) { + return ret; + } SetGlobalLocal(); SetConstArgs(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; @@ -79,58 +120,42 @@ int GatherOpenCLKernel::Prepare() { int GatherOpenCLKernel::InitWeights() { auto indices_tensor = in_tensors_.at(1); - int indices_num = indices_tensor->ElementsNum(); - bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; + auto indices_num = indices_tensor->ElementsNum(); auto allocator = ocl_runtime_->GetAllocator(); - if (!isIndicesInt32) { - indices_data_ = reinterpret_cast(allocator->Malloc(sizeof(int32_t) * indices_num)); - if (indices_data_ == nullptr) { - MS_LOG(ERROR) << "Memory allocation failed"; - return RET_ERROR; - } + indices_data_ = reinterpret_cast(allocator->Malloc(sizeof(int32_t) * indices_num)); + if (indices_data_ == nullptr) { + MS_LOG(ERROR) << "Memory allocation failed"; + return RET_ERROR; } - return RET_OK; -} -int GatherOpenCLKernel::UpdateWeights() { - auto indices_tensor = in_tensors_.at(1); - int indices_num = indices_tensor->ElementsNum(); - bool isIndicesInt32 = indices_tensor->data_type() == kNumberTypeInt32; - if (!isIndicesInt32) { - if (indices_tensor->data_type() == kNumberTypeInt64) { - for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->data_c())[i]; - } - } else if (indices_tensor->data_type() == kNumberTypeFloat32) { - for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->data_c())[i]; - } - } else if (indices_tensor->data_type() == kNumberTypeFloat16) { - for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->data_c())[i]; - } - } else { - MS_LOG(ERROR) << "Unsupported data type: " << indices_tensor->data_type(); - return RET_ERROR; + auto data_type = indices_tensor->data_type(); + auto data = indices_tensor->data_c(); + if (data_type == kNumberTypeInt32) { + for (int i = 0; i < indices_num; i++) { + indices_data_[i] = reinterpret_cast(data)[i]; + } + } else if (data_type == kNumberTypeInt64) { + for (int i = 0; i < indices_num; i++) { + indices_data_[i] = reinterpret_cast(data)[i]; + } + } else if (data_type == kNumberTypeFloat32) { + for (int i = 0; i < indices_num; i++) { + indices_data_[i] = reinterpret_cast(data)[i]; + } + } else if (data_type == kNumberTypeFloat16) { + for (int i = 0; i < indices_num; i++) { + indices_data_[i] = reinterpret_cast(data)[i]; } - } else { - indices_data_ = reinterpret_cast(indices_tensor->data_c()); } return RET_OK; } int GatherOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - - if (UpdateWeights() != RET_OK) { - return RET_ERROR; - } - - ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); - + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index b1a3f5a04f..4fb5c0151d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -45,6 +45,7 @@ class GatherOpenCLKernel : public OpenCLKernel { private: cl::Kernel kernel_; int32_t *indices_data_{nullptr}; + int axis_ = {0}; }; } // namespace mindspore::kernel #endif diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/hswish.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/hswish.cc deleted file mode 100644 index 099548aca1..0000000000 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/hswish.cc +++ /dev/null @@ -1,128 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/runtime/kernel/opencl/kernel/hswish.h" -#include -#include -#include -#include -#include "src/kernel_registry.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "src/runtime/kernel/opencl/cl/hswish.cl.inc" - -using mindspore::kernel::KERNEL_ARCH::kGPU; -using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_Activation; - -namespace mindspore::kernel { - -int HswishOpenCLKernel::Init() { - if (out_tensors_[0]->shape().size() > 4) { - MS_LOG(ERROR) << " only support dim <= 4"; - return RET_ERROR; - } - - std::string kernel_name = "hswish"; - std::set build_options; - std::string source = hswish_source; - std::string program_name = "hswish"; - ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); - MS_LOG(DEBUG) << kernel_name << " Init Done!"; - return RET_OK; -} - -void HswishGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { - const int max_divider = 8; - const int max_x = 2, max_y = 8; - int x = std::min(GetMaxDivisorStrategy1(global[0], max_divider), max_x); - int yz = max_size / x; - int y = std::min(std::min(GetMaxDivisorStrategy1(global[1], max_divider), yz), max_y); - int z = std::min(yz / y, static_cast(UP_DIV(global[2], 2))); - - local->clear(); - local->push_back(x); - local->push_back(y); - local->push_back(z); -} - -int HswishOpenCLKernel::InferShapeTo4D() { - if (in_tensors_[0]->shape().size() <= 4) { - if (in_tensors_[0]->shape().size() == 1) { - N_ = in_tensors_[0]->shape()[0]; - } else if (in_tensors_[0]->shape().size() == 2) { - N_ = in_tensors_[0]->shape()[0]; - C_ = in_tensors_[0]->shape()[1]; - } else if (in_tensors_[0]->shape().size() == 3) { - N_ = in_tensors_[0]->shape()[0]; - W_ = in_tensors_[0]->shape()[1]; - C_ = in_tensors_[0]->shape()[2]; - } else { - N_ = in_tensors_[0]->shape()[0]; - H_ = in_tensors_[0]->shape()[1]; - W_ = in_tensors_[0]->shape()[2]; - C_ = in_tensors_[0]->shape()[3]; - } - } else { - MS_LOG(ERROR) << "Unsupported inputdim: " << in_tensors_[0]->shape().size(); - return RET_ERROR; - } - return RET_OK; -} - -int HswishOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running! "; - auto output_shape = out_tensors_[0]->shape(); - InferShapeTo4D(); - cl_int4 output_shape_ = {static_cast(N_), static_cast(H_), static_cast(W_), - static_cast(UP_DIV(C_, C4NUM))}; - const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); - std::vector local = {1, 1, 1}; - uint32_t OH = N_ * H_; - uint32_t OW = W_; - uint32_t OC = UP_DIV(C_, C4NUM); - std::vector global = {OH, OW, OC}; - HswishGetWorkGroup(global, &local, max_global[0]); - int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime_->RunKernel(kernel_, global, local, nullptr); - return RET_OK; -} - -kernel::LiteKernel *HswishOpenCLKernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const lite::InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - auto *kernel = new (std::nothrow) HswishOpenCLKernel(opParameter, inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << " new HswishOpenCLKernel failed "; - free(opParameter); - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << " Init kernel failed, name: hswish "; - delete kernel; - return nullptr; - } - return kernel; -} - -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 3100b45be0..bdee117db1 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -59,11 +59,10 @@ int MatMulOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = matmul_source; std::string program_name = "MatMul"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif InitWeights(); SetConstArgs(); @@ -159,7 +158,7 @@ int MatMulOpenCLKernel::Run() { int arg_count = 0; ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc index 236f3981c3..7dea6d5203 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc @@ -34,8 +34,8 @@ int OneHotOpenCLKernel::CheckSpecs() { return RET_OK; } int OneHotOpenCLKernel::Prepare() { std::string kernel_name = "OneHot"; auto param = reinterpret_cast(op_parameter_); - in_shape_ = Image2DInfo(in_tensors_[0]); - out_shape_ = Image2DInfo(out_tensors_[0]); + in_shape_ = GpuTensorInfo(in_tensors_[0]); + out_shape_ = GpuTensorInfo(out_tensors_[0]); axis_ = out_shape_.AlignAxis(param->axis_); if (in_tensors_[0]->shape().size() == 1 && axis_ == 0) { kernel_name += "2DAxis0"; @@ -82,7 +82,7 @@ void OneHotOpenCLKernel::SetConstArgs() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(out_shape_.C)); + ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast(out_shape_.C)); } void OneHotOpenCLKernel::SetGlobalLocal() { global_range_ = {out_shape_.Slice, out_shape_.W, out_shape_.H * out_shape_.N}; @@ -90,10 +90,9 @@ void OneHotOpenCLKernel::SetGlobalLocal() { int OneHotOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h index c24e21ba96..865a713a9d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h @@ -44,8 +44,8 @@ class OneHotOpenCLKernel : public OpenCLKernel { float on_value_{1.0f}; float off_value_{0.0f}; int axis_{0}; - Image2DInfo in_shape_ = Image2DInfo(nullptr); - Image2DInfo out_shape_ = Image2DInfo(nullptr); + GpuTensorInfo in_shape_ = GpuTensorInfo(nullptr); + GpuTensorInfo out_shape_ = GpuTensorInfo(nullptr); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc index 77d5dfd966..fb9eee7987 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc @@ -33,91 +33,81 @@ using mindspore::schema::PrimitiveType_Pad; namespace mindspore::kernel { -int PadOpenCLKernel::Init() { +int PadOpenCLKernel::CheckSpecs() { auto param = reinterpret_cast(op_parameter_); - std::set build_options; - - if (in_tensors_.empty()) { - MS_LOG(ERROR) << "PadOpenCLKernel in_tensors is empty"; + if (in_tensors_.size() != 1) { + MS_LOG(ERROR) << "Pad only support 1 input Tensor."; + return RET_ERROR; + } + if (out_tensors_.size() != 1) { + MS_LOG(ERROR) << "Pad only support 1 output Tensor."; + return RET_ERROR; + } + auto in_ndim = in_tensors_.front()->shape().size(); + if (in_ndim < 1 || in_ndim > 4) { + MS_LOG(ERROR) << "Pad only supports 1D-4D input Tensor but get " << in_ndim << "D."; return RET_ERROR; } - if (out_tensors_.empty()) { - MS_LOG(ERROR) << "PadOpenCLKernel out_tensors is empty"; + auto out_ndim = in_tensors_.front()->shape().size(); + if (out_ndim < 1 || out_ndim > 4) { + MS_LOG(ERROR) << "Pad only supports 1D-4D output Tensor but get " << out_ndim << "D."; return RET_ERROR; } - if (param->paddings_[0] || param->paddings_[1] || param->paddings_[6] || param->paddings_[7]) { - MS_LOG(ERROR) << "PadOpenCLKernel not support pad at Batch/Channel axis"; + if (in_ndim != out_ndim) { + MS_LOG(ERROR) << "Pad: input ndim != output ndim."; return RET_ERROR; } if (param->pad_mode_ != PaddingMode_CONSTANT) { - MS_LOG(ERROR) << "PadOpenCLKernel only support CONSTANT MODE"; + MS_LOG(ERROR) << "Pad only support CONSTANT MODE."; return RET_ERROR; } + return RET_OK; +} - auto input_tensor = in_tensors_[0]; - auto output_tensor = out_tensors_[0]; - - CI_ = input_tensor->Channel(); - IH_ = input_tensor->Height(); - IW_ = input_tensor->Width(); - CO_ = output_tensor->Channel(); - OH_ = output_tensor->Height(); - OW_ = output_tensor->Width(); - CI_SLICES_ = UP_DIV(CI_, C4NUM); - CO_SLICES_ = UP_DIV(CO_, C4NUM); - +int PadOpenCLKernel::Prepare() { const std::string source = pad_source; const std::string program_name = "Pad"; - const std::string kernel_name = "Pad_NHWC4"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); - - MS_LOG(DEBUG) << "Pad Init Done!"; + ocl_runtime_->BuildKernel(kernel_, program_name, "Pad"); + SetConstArgs(); return RET_OK; } -int PadOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; - - auto param = reinterpret_cast(op_parameter_); - cl_int4 input_shape = {1, IH_, IW_, CI_SLICES_}; - cl_int4 output_shape = {1, OH_, OW_, CO_SLICES_}; - cl_int2 pad_top_left = {param->paddings_[2], param->paddings_[4]}; +void PadOpenCLKernel::SetConstArgs() { + auto input = GpuTensorInfo(in_tensors_.front()); + auto output = GpuTensorInfo(out_tensors_.front()); + cl_int4 input_shape = {static_cast(input.N), static_cast(input.H), static_cast(input.W), + static_cast(input.C)}; + cl_int4 output_shape = {static_cast(output.N), static_cast(output.H), static_cast(output.W), + static_cast(output.C)}; + cl_int2 io_slices = {static_cast(input.Slice), static_cast(output.Slice)}; + + int ndim = in_tensors_.front()->shape().size(); + std::vector pad_before_ori; + pad_before_ori.reserve(ndim); + for (size_t i = 0; i < ndim; i++) { + pad_before_ori.push_back(param_->paddings_[MAX_PAD_SIZE - 2 * ndim + 2 * i]); + } + cl_int4 pad_before; + Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0); - int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); + int arg_cn = 2; ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_top_left); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, static_cast(param->constant_value_)); - - std::vector global = {static_cast(OH_), static_cast(OW_), static_cast(CO_SLICES_)}; - std::vector local = {8, 4, 1}; - ocl_runtime_->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before); + ocl_runtime_->SetKernelArg(kernel_, arg_cn, static_cast(param_->constant_value_)); - return RET_OK; + AlignGlobalLocal({output.N * output.H, output.W, output.Slice}, {8, 4, 1}); } -kernel::LiteKernel *OpenCLPadKernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const lite::InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - auto *kernel = new (std::nothrow) PadOpenCLKernel(reinterpret_cast(opParameter), inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << "Create OpenCL Pad kernel failed!"; - free(opParameter); - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init kernel failed, name: Pad"; - delete kernel; - return nullptr; - } - return kernel; +int PadOpenCLKernel::Run() { + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); + return RET_OK; } -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Pad, OpenCLPadKernelCreator) -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Pad, OpenCLPadKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Pad, OpenCLKernelCreator) +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Pad, OpenCLKernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h index e274689d3b..604d08640f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h @@ -30,22 +30,19 @@ class PadOpenCLKernel : public OpenCLKernel { public: PadOpenCLKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs) - : OpenCLKernel(parameter, inputs, outputs) {} + : OpenCLKernel(parameter, inputs, outputs), param_(reinterpret_cast(op_parameter_)) {} ~PadOpenCLKernel() override = default; - int Init() override; + int CheckSpecs() override; + + int Prepare() override; + void SetConstArgs() override; + int Run() override; private: cl::Kernel kernel_; - int CI_{}; - int IH_{}; - int IW_{}; - int CO_{}; - int OH_{}; - int OW_{}; - int CI_SLICES_{}; - int CO_SLICES_{}; + PadParameter *param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 1075c68bfb..52d30465c9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -70,11 +70,10 @@ int PoolingOpenCLKernel::Prepare() { #else kernel_name += "_NHWC4"; kernel_name += "_IMG"; - std::set build_options; std::string source = pooling2d_source; std::string program_name = "Pooling2d"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -112,7 +111,7 @@ int PoolingOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc index 7e0fe62409..80eb852b11 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc @@ -35,7 +35,6 @@ int PowerOpenCLKernel::Init() { use_fp16_enable_ = ocl_runtime_->GetFp16Enable(); auto param = reinterpret_cast(this->op_parameter_); std::string kernel_name = "power"; - std::set build_options; std::string source = power_source; std::string program_name = "power"; broadcast_ = param->broadcast_; @@ -55,7 +54,7 @@ int PowerOpenCLKernel::Init() { scale_ = param->scale_; shift_ = param->shift_; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -133,7 +132,7 @@ int PowerOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); } - ocl_runtime_->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->RunKernel(kernel_, global, local); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 58b5a1a674..2864267312 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -103,12 +103,11 @@ int PReluOpenCLKernel::Init() { } enable_fp16_ = ocl_runtime_->GetFp16Enable(); - std::set build_options; std::string source = prelu_source; std::string program_name = "PRelu"; std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); InitWeights(); MS_LOG(DEBUG) << program_name << " init Done!"; @@ -133,7 +132,7 @@ int PReluOpenCLKernel::Run() { std::vector local = {4, 4, 1}; std::vector global = {static_cast(H_), static_cast(W_), static_cast(CO_SLICES_)}; - auto ret = ocl_runtime_->RunKernel(kernel_, global, local, nullptr); + auto ret = ocl_runtime_->RunKernel(kernel_, global, local); if (ret != mindspore::lite::RET_OK) { MS_LOG(ERROR) << "Run kernel " << op_parameter_->name_ << " error."; return mindspore::lite::RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index 174bab76f8..a3861c77a2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -96,7 +96,7 @@ int ReduceOpenCLKernel::CheckSpecs() { } int ReduceOpenCLKernel::Prepare() { - outShape = Image2DInfo(out_tensors_[0]); + outShape = GpuTensorInfo(out_tensors_[0]); auto reduce_param = reinterpret_cast(op_parameter_); if (reduce_param == nullptr) { return RET_NULL_PTR; @@ -120,11 +120,10 @@ int ReduceOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = reduce_source; std::string program_name = "Reduce"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -165,7 +164,7 @@ int ReduceOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h index 8d38682365..f316b93b65 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h @@ -41,7 +41,7 @@ class ReduceOpenCLKernel : public OpenCLKernel { cl_float4 GenC4Mask(); static std::string GetReduceTypeStr(int type); cl::Kernel kernel_; - Image2DInfo outShape = Image2DInfo(nullptr); + GpuTensorInfo outShape = GpuTensorInfo(nullptr); bool use_local_{false}; bool wc_reduce_{false}; static const size_t LOCAL_CACHE_THREAD{16}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 5ef6b4162e..8d436e9820 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -43,8 +43,8 @@ int ReshapeOpenCLKernel::CheckSpecs() { } void ReshapeOpenCLKernel::SetConstArgs() { - auto in = Image2DInfo(in_tensors_.front()); - auto out = Image2DInfo(out_tensors_.front()); + auto in = GpuTensorInfo(in_tensors_.front()); + auto out = GpuTensorInfo(out_tensors_.front()); cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; @@ -54,7 +54,7 @@ void ReshapeOpenCLKernel::SetConstArgs() { } void ReshapeOpenCLKernel::SetGlobalLocal() { - auto out = Image2DInfo(out_tensors_.front()); + auto out = GpuTensorInfo(out_tensors_.front()); std::vector local = {}; std::vector global{out.width, out.height}; OpenCLKernel::AlignGlobalLocal(global, local); @@ -65,11 +65,10 @@ int ReshapeOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = reshape_source; std::string program_name = "reshape"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetGlobalLocal(); @@ -82,7 +81,7 @@ int ReshapeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc index b50738ce1d..252897696f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc @@ -63,11 +63,10 @@ int ResizeOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = resize_source; std::string program_name = "Resize"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -102,7 +101,7 @@ void ResizeOpenCLKernel::SetConstArgs() { void ResizeOpenCLKernel::SetGlobalLocal() { local_range_ = {}; - auto out_shape = Image2DInfo(out_tensors_[0]); + auto out_shape = GpuTensorInfo(out_tensors_[0]); global_range_ = {out_shape.Slice, out_shape.W, out_shape.H}; } @@ -111,7 +110,7 @@ int ResizeOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index 15e1d32789..a5c6e0b340 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -49,7 +49,7 @@ ScaleOpenCLKernel::~ScaleOpenCLKernel() { void ScaleOpenCLKernel::Image2dGetWorkGroupSize() { local_size_ = {16, 16}; - auto image2d_info = Image2DInfo(out_tensors_[0]); + auto image2d_info = GpuTensorInfo(out_tensors_[0]); global_size_ = {image2d_info.width, image2d_info.height}; } @@ -69,7 +69,7 @@ int ScaleOpenCLKernel::InitWeights() { offset_ptr_ = allocator->Malloc(in_tensors_[2]->ElementsNum(), img_size, in_tensors_[2]->data_c()); return RET_OK; } - auto image2d_info = Image2DInfo(in_tensors_[1]); + auto image2d_info = GpuTensorInfo(in_tensors_[1]); int pack_weight_size = image2d_info.ElementsC4Num; int plane = image2d_info.H * image2d_info.W; int channel = image2d_info.C; @@ -185,10 +185,9 @@ int ScaleOpenCLKernel::Init() { kernel_name += "_BUF"; } std::string program_name = "Scale"; - std::set build_options; std::string source = scale_source; ocl_runtime_->LoadSource(program_name, source); - error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + error_code = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif if (error_code != RET_OK) { return error_code; @@ -244,7 +243,7 @@ int ScaleOpenCLKernel::Run() { } } ocl_runtime_->SetKernelArg(kernel_, arg_idx++, act_type); - ocl_runtime_->RunKernel(kernel_, global_size_, local_size_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_size_, local_size_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index ada387fcc1..ae7929ca4a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -67,7 +67,7 @@ int SoftmaxOpenCLKernel::CheckSpecs() { int SoftmaxOpenCLKernel::Prepare() { std::string kernel_name = "SoftMax"; - out_shape = Image2DInfo(out_tensors_[0]); + out_shape = GpuTensorInfo(out_tensors_[0]); std::string source = softmax_source; if (out_shape.H == 1 && out_shape.W == 1 && axis_ == 3) { // support 4d tensor @@ -81,10 +81,9 @@ int SoftmaxOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string program_name = "SoftMax"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -135,7 +134,7 @@ int SoftmaxOpenCLKernel::Run() { ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); // run opengl kernel - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h index a68cf41105..9ba280b6be 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h @@ -51,7 +51,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { std::vector local_size_; std::vector global_size_; int axis_{0}; - Image2DInfo out_shape = Image2DInfo(nullptr); + GpuTensorInfo out_shape = GpuTensorInfo(nullptr); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc index 9fffb80b43..9776508fc6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc @@ -90,11 +90,10 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = space_to_batch_nd_source; std::string program_name = "space_to_batch_nd"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetGlobalLocal(); @@ -106,9 +105,9 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { int SpaceToBatchNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::IMG); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc index 77acb688f1..035755a66c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc @@ -35,8 +35,8 @@ int SpaceToDepthOpenCLKernel::CheckSpecs() { return RET_OK; } int SpaceToDepthOpenCLKernel::Prepare() { std::string kernel_name; - in_shape_ = Image2DInfo(in_tensors_[0]); - out_shape_ = Image2DInfo(out_tensors_[0]); + in_shape_ = GpuTensorInfo(in_tensors_[0]); + out_shape_ = GpuTensorInfo(out_tensors_[0]); if (in_shape_.C % C4NUM != 0) { kernel_name = "SpaceToDepth"; } else { @@ -45,11 +45,10 @@ int SpaceToDepthOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = space_to_depth_source; std::string program_name = "SpaceToDepth"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -78,7 +77,7 @@ int SpaceToDepthOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h index 55b1d48792..b7df317841 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h @@ -39,8 +39,8 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel { private: cl::Kernel kernel_; - Image2DInfo in_shape_ = Image2DInfo(nullptr); - Image2DInfo out_shape_ = Image2DInfo(nullptr); + GpuTensorInfo in_shape_ = GpuTensorInfo(nullptr); + GpuTensorInfo out_shape_ = GpuTensorInfo(nullptr); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc index b4382e6958..a1d203250c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc @@ -96,7 +96,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() { << out_tensors_[0]->shape().size(); return RET_ERROR; } - if (out_tensors_[0]->shape().size() > 2 || in_tensors_.size() < 3) { + if (out_tensors_[0]->shape().size() > 3 || in_tensors_.size() < 3) { MS_LOG(ERROR) << " only support dim <= 2 and in_tensors_.size >= 3"; return RET_ERROR; } @@ -121,7 +121,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() { void SparseToDenseOpenCLKernel::SetConstArgs() { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - Image2DInfo img_info(out_tensors_[0]); + GpuTensorInfo img_info(out_tensors_[0]); size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); stride_w = img_info.RowPitch() / dtype; cl_int2 input_shape = {n_ * h_, w_ * UP_DIV(c_, C4NUM)}; @@ -148,11 +148,10 @@ int SparseToDenseOpenCLKernel::Prepare() { inshapeindex1_dim = in_tensors_[0]->shape()[1]; weight_scalar_ = in_tensors_[2]->IsScalar(); std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); - std::set build_options; std::string source = sparse_to_dense_source; std::string program_name = "SparseToDense"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); if (in_tensors_.size() > 3) { auto input_tensor3 = in_tensors_[3]; @@ -210,7 +209,7 @@ int SparseToDenseOpenCLKernel::Run() { } else { ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_); } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc index 5ce9b34c5a..b5639a34f0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc @@ -73,11 +73,10 @@ int StackOpenCLKernel::Init() { return RET_ERROR; } MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; - std::set build_options; std::string source = stack_source; std::string program_name = "stack"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); return RET_OK; } @@ -184,7 +183,7 @@ int StackOpenCLKernel::Run() { std::vector global = {OH_, OW_, OC_}; StackGetWorkGroup(global, &local, max_global[0]); ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->RunKernel(kernel_, global, local, nullptr); + ocl_runtime_->RunKernel(kernel_, global, local); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc index 2c0f1e3efb..a39edd1b6d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -33,7 +33,7 @@ using mindspore::schema::PrimitiveType_StridedSlice; namespace mindspore::kernel { -int SliceOpenCLKernel::CheckSpecs() { +int StridedSliceOpenCLKernel::CheckSpecs() { const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice"; if (in_tensors_.size() != 1) { MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor."; @@ -54,25 +54,24 @@ int SliceOpenCLKernel::CheckSpecs() { return RET_ERROR; } if (InitConstArgs() != RET_OK) { - MS_LOG(ERROR) << "call SliceOpenCLKernel::InitConstArgs() failed"; + MS_LOG(ERROR) << "call InitConstArgs() failed"; return RET_ERROR; } return RET_OK; } -int SliceOpenCLKernel::Prepare() { - std::set build_options; +int StridedSliceOpenCLKernel::Prepare() { std::string program_name = "strided_slice"; ocl_runtime_->LoadSource(program_name, strided_slice_source); - ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice"); SetConstArgs(); SetGlobalLocal(); return RET_OK; } -int SliceOpenCLKernel::InitConstArgs() { - auto input_info = Image2DInfo(in_tensors_.front()); - auto output_info = Image2DInfo(out_tensors_.front()); +int StridedSliceOpenCLKernel::InitConstArgs() { + auto input_info = GpuTensorInfo(in_tensors_.front()); + auto output_info = GpuTensorInfo(out_tensors_.front()); input_shape_ = {static_cast(input_info.N), static_cast(input_info.H), static_cast(input_info.W), static_cast(input_info.C)}; output_shape_ = {static_cast(output_info.N), static_cast(output_info.H), @@ -81,19 +80,19 @@ int SliceOpenCLKernel::InitConstArgs() { if (op_parameter_->type_ == PrimitiveType_Slice) { auto param = reinterpret_cast(op_parameter_); - Broadcast2GpuShape(param->begin_, begin_.s, param->param_length_, 0); - Broadcast2GpuShape(param->size_, size_.s, param->param_length_, -1); + Broadcast2GpuShape(begin_.s, param->begin_, param->param_length_, 0); + Broadcast2GpuShape(size_.s, param->size_, param->param_length_, -1); for (int i = 0; i < 4; ++i) { if (begin_.s[i] < 0) { begin_.s[i] += input_shape_.s[i]; } if (begin_.s[i] < 0 || begin_.s[i] >= input_shape_.s[i]) { - MS_LOG(ERROR) << "Slice kernel only supports 0<=begin0 but size[i]=" << size_.s[i]; + MS_LOG(ERROR) << "Slice only supports size=-1 or size>0 but size[i]=" << size_.s[i]; return RET_ERROR; } if (size_.s[i] == -1 || begin_.s[i] + size_.s[i] > input_shape_.s[i]) { @@ -103,9 +102,9 @@ int SliceOpenCLKernel::InitConstArgs() { } else { auto param = reinterpret_cast(op_parameter_); cl_int4 end = input_shape_; - Broadcast2GpuShape(param->begins_, begin_.s, param->num_axes_, 0); - Broadcast2GpuShape(param->strides_, stride_.s, param->num_axes_, 1); - Broadcast2GpuShape(param->ends_, end.s, param->num_axes_); + Broadcast2GpuShape(begin_.s, param->begins_, param->num_axes_, 0); + Broadcast2GpuShape(stride_.s, param->strides_, param->num_axes_, 1); + Broadcast2GpuShape(end.s, param->ends_, param->num_axes_); for (int i = 0; i < 4; ++i) { // begin is negative @@ -143,9 +142,9 @@ int SliceOpenCLKernel::InitConstArgs() { // check size std::vector shape_not_1; std::vector size_not_1; - std::copy_if(out_tensors_.front()->shape().begin(), out_tensors_.front()->shape().end(), shape_not_1.begin(), - [](int x) { return x > 1; }); - std::copy_if(size_.s, size_.s + 4, size_not_1.begin(), [](int x) { return x > 1; }); + auto output_shape = out_tensors_.front()->shape(); + std::copy_if(output_shape.begin(), output_shape.end(), std::back_inserter(shape_not_1), [](int x) { return x > 1; }); + std::copy_if(size_.s, size_.s + 4, std::back_inserter(size_not_1), [](int x) { return x > 1; }); if (shape_not_1 != size_not_1) { MS_LOG(ERROR) << "Slice/StridedSlice kernel output shape infer error"; return RET_ERROR; @@ -153,7 +152,7 @@ int SliceOpenCLKernel::InitConstArgs() { return RET_OK; } -void SliceOpenCLKernel::SetConstArgs() { +void StridedSliceOpenCLKernel::SetConstArgs() { int arg_cn = 2; ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); @@ -163,8 +162,8 @@ void SliceOpenCLKernel::SetConstArgs() { ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); } -void SliceOpenCLKernel::SetGlobalLocal() { - auto output_info = Image2DInfo(out_tensors_.front()); +void StridedSliceOpenCLKernel::SetGlobalLocal() { + auto output_info = GpuTensorInfo(out_tensors_.front()); std::vector global = {output_info.N * output_info.H, output_info.W, output_info.Slice}; const int max_divider = 8; @@ -177,16 +176,16 @@ void SliceOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global, local); } -int SliceOpenCLKernel::Run() { +int StridedSliceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator); -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator); -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator); -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator); } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h index 7edbf437c1..58fb7d3d8b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h @@ -23,21 +23,22 @@ namespace mindspore::kernel { -class SliceOpenCLKernel : public OpenCLKernel { +class StridedSliceOpenCLKernel : public OpenCLKernel { public: - SliceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs) + StridedSliceOpenCLKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs) : OpenCLKernel(parameter, inputs, outputs) {} - ~SliceOpenCLKernel() override = default; - - int Prepare() override; - int Run() override; + ~StridedSliceOpenCLKernel() override = default; int CheckSpecs() override; + + int Prepare() override; void SetConstArgs() override; void SetGlobalLocal() override; + int Run() override; + private: int InitConstArgs(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 640a147cd6..3e469cf496 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -33,24 +33,27 @@ using mindspore::schema::PrimitiveType_ToFormat; namespace mindspore::kernel { int ToFormatOpenCLKernel::CheckSpecs() { - if (in_tensors_[0]->data_type() != kNumberTypeFloat32 && in_tensors_[0]->data_type() != kNumberTypeFloat16) { - MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[0]->data_type(); + auto data_type = in_tensors_.front()->data_type(); + if (data_type != kNumberTypeFloat32 && data_type != kNumberTypeFloat16) { + MS_LOG(ERROR) << "Unsupported data type " << data_type; return RET_ERROR; } auto parameter = reinterpret_cast(op_parameter_); out_mem_type_ = parameter->out_mem_type; return RET_OK; } + void ToFormatOpenCLKernel::SetConstArgs() { cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_}; cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1}; ocl_runtime_->SetKernelArg(kernel_, 2, gsize); ocl_runtime_->SetKernelArg(kernel_, 3, shape); } + void ToFormatOpenCLKernel::SetGlobalLocal() { std::vector global = {N_ * H_, W_, UP_DIV(C_, C4NUM)}; std::vector local = {8, 16, 3}; - size_t max_work_group_size = ocl_runtime_->GetKernelMaxWorkGroupSize(kernel_(), (*ocl_runtime_->Device())()); + size_t max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize(); if (max_work_group_size < 384) { local[2] = 1; } @@ -61,9 +64,9 @@ int ToFormatOpenCLKernel::Prepare() { std::map dtype_str{{kNumberTypeFloat32, "float"}, {kNumberTypeFloat16, "half"}}; std::string kernel_name; if (out_mem_type_ == MemType::IMG) { - kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_[0]->data_type()]; + kernel_name = "to_format_NHWC_to_NHWC4_IMG_" + dtype_str[in_tensors_.front()->data_type()]; } else { - kernel_name = "to_format_NHWC4_to_NHWC_BUF_" + dtype_str[out_tensors_[0]->data_type()]; + kernel_name = "to_format_NHWC4_to_NHWC_BUF_" + dtype_str[out_tensors_.front()->data_type()]; } this->set_name(kernel_name); @@ -71,52 +74,30 @@ int ToFormatOpenCLKernel::Prepare() { kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else std::string program_name = "to_format"; - std::set build_options; std::string source = to_format_source; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif - InitNHWC(); + auto output = GpuTensorInfo(out_tensors_.front()); + N_ = output.N; + H_ = output.H; + W_ = output.W; + C_ = output.C; + SetGlobalLocal(); SetConstArgs(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -int ToFormatOpenCLKernel::InitNHWC() { - std::vector out_shape = out_tensors_[0]->shape(); - if (out_shape.size() == 1) { - N_ = out_shape[0]; - H_ = 1; - W_ = 1; - C_ = 1; - } else if (out_shape.size() == 2) { - N_ = out_shape[0]; - H_ = 1; - W_ = 1; - C_ = out_shape[1]; - } else if (out_shape.size() == 3) { - N_ = out_shape[0]; - H_ = 1; - W_ = out_shape[1]; - C_ = out_shape[2]; - } else if (out_shape.size() == 4) { - N_ = out_shape[0]; - H_ = out_shape[1]; - W_ = out_shape[2]; - C_ = out_shape[3]; - } - return RET_OK; -} - int ToFormatOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto dst_mem_type = out_mem_type_; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), src_mem_type); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), dst_mem_type); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h index b18b3c7a80..ee4801ea58 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h @@ -38,8 +38,6 @@ class ToFormatOpenCLKernel : public OpenCLKernel { void SetGlobalLocal() override; private: - int InitNHWC(); - cl::Kernel kernel_; size_t N_{1}; size_t H_{1}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index ce34c0b42a..d6b53ffc95 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -68,11 +68,10 @@ int TransposeOpenCLKernel::Prepare() { #ifdef PROGRAM_WITH_IL kernel_ = ocl_runtime_->GetKernelFromBinary(kernel_name); #else - std::set build_options; std::string source = transpose_source; std::string program_name = "transpose"; ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); + ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name); #endif SetConstArgs(); SetGlobalLocal(); @@ -109,7 +108,7 @@ int TransposeOpenCLKernel::Run() { int arg_idx = 0; ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return mindspore::lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 4ebecd360f..beae7c39f9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -35,7 +35,7 @@ struct OpenCLToFormatParameter { }; template -void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) { +void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num) { auto *N = dst; auto *H = dst + 1; auto *W = dst + 2; @@ -60,37 +60,26 @@ void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) { } template -void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num, DstT default_value) { +void Broadcast2GpuShape(DstT *dst, const SrcT *src, int src_num, DstT default_value) { for (int i = 0; i < 4; ++i) { dst[i] = default_value; } - Broadcast2GpuShape(src, dst, src_num); + Broadcast2GpuShape(dst, src, src_num); } -struct Image2DInfo { - explicit Image2DInfo(const lite::Tensor *tensor) { +struct GpuTensorInfo { + explicit GpuTensorInfo(const lite::Tensor *tensor) { if (tensor == nullptr) { return; } - auto shape = tensor->shape(); - OriDim = shape.size(); - if (OriDim == 1) { - N = shape[0]; - } else if (OriDim == 2) { - N = shape[0]; - C = shape[1]; - } else if (OriDim == 3) { - N = shape[0]; - W = shape[1]; - C = shape[2]; - } else if (OriDim == 4) { - N = shape[0]; - H = shape[1]; - W = shape[2]; - C = shape[3]; - } else if (OriDim >= 5) { - MS_LOG(ERROR) << "GPU doesn't support Tensor with ndim>=" << OriDim; - } + auto shape_ori = tensor->shape(); + NDim = shape_ori.size(); + cl_int4 shape; + Broadcast2GpuShape(shape.s, shape_ori.data(), shape_ori.size(), 1); + N = shape.s[0]; + H = shape.s[1]; + W = shape.s[2]; + C = shape.s[3]; Slice = UP_DIV(C, C4NUM); FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float); @@ -117,14 +106,14 @@ struct Image2DInfo { } int AlignAxis(int oriAxis) const { - if (OriDim == 0) { + if (NDim == 0) { return 0; } - int no_neg_axis = (oriAxis + OriDim) % OriDim; + int no_neg_axis = static_cast((oriAxis + NDim) % NDim); if (no_neg_axis == 0) { return 0; } - return no_neg_axis + 4 - OriDim; + return static_cast(no_neg_axis + 4 - NDim); } size_t N{1}; @@ -140,7 +129,7 @@ struct Image2DInfo { size_t ElementsC4Num{}; size_t OriginSize{}; size_t Image2DSize{}; - size_t OriDim{}; + size_t NDim{}; }; class OpenCLKernel : public LiteKernel { @@ -205,7 +194,7 @@ class OpenCLKernel : public LiteKernel { if (idx >= out_tensors_.size()) { return RET_ERROR; } - auto img_info = Image2DInfo(out_tensors_[idx]); + auto img_info = GpuTensorInfo(out_tensors_[idx]); size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; *img_size = {img_info.width, img_info.height, img_dtype}; return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/opencl/utils.cc b/mindspore/lite/src/runtime/kernel/opencl/utils.cc index bef9e33664..fa3e8fc5c5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/utils.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/utils.cc @@ -260,7 +260,7 @@ void PrintTensor(const lite::Tensor *tensor, MemType mem_type, int n, const std: return; } - Image2DInfo img_info(tensor); + GpuTensorInfo img_info(tensor); auto size = mem_type == MemType::BUF ? img_info.OriginSize : img_info.Image2DSize; std::vector data(size); auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); diff --git a/mindspore/lite/src/runtime/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/opencl/opencl_runtime.h index 72a622c14f..e82f09072e 100644 --- a/mindspore/lite/src/runtime/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/opencl/opencl_runtime.h @@ -112,11 +112,11 @@ class OpenCLRuntime { std::vector> GetProgramBinaries(const cl::Program &program); bool LoadSource(const std::string &program_name, const std::string &source); int BuildKernel(cl::Kernel &kernel, const std::string &program_name, const std::string &kernel_name, - const std::set &build_options); + const std::set &build_options = {}); int RunKernel(const cl::Kernel &kernel, const std::vector &global, const std::vector &local, - cl::CommandQueue *command_queue); // !!!To be deleted + cl::CommandQueue *command_queue = nullptr); // !!!To be deleted int RunKernel(const cl::Kernel &kernel, const cl::NDRange &global, const cl::NDRange &local, - cl::CommandQueue *command_queue); + cl::CommandQueue *command_queue = nullptr); bool CopyDeviceMemToHost(void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr, bool sync = false) const; bool CopyHostMemToDevice(const void *dst, const void *src, size_t size, cl::CommandQueue *command_queue = nullptr, diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc index 2f175d091e..bb53b44e43 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/activation_tests.cc @@ -13,110 +13,83 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/fp32/activation_fp32.h" -namespace mindspore { -class TestActivationOpenCL : public mindspore::CommonTest { - public: - TestActivationOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseActivation(void *input_data0, const std::vector &input_shape, void *output_data, - const std::vector &out_shape, bool enable_fp16, int act_type) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(ActivationParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->op_parameter_.type_ = schema::PrimitiveType_Activation; - param->type_ = act_type; - auto tensor_x_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), input_shape); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto op_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - inputs[0]->MallocData(allocator); +class TestOpenCL_Activation : public CommonTest {}; - std::vector kernels{op_kernel}; +namespace { +// PrimitiveType_Activation: src/ops/populate/activation_populate.cc +OpParameter *CreateParameter(schema::ActivationType act_type) { + auto *param = test::CreateParameter(schema::PrimitiveType_Activation); + param->type_ = act_type; + param->alpha_ = 0.0f; + param->min_val_ = 0.0f; + param->max_val_ = 0.0f; + return reinterpret_cast(param); +} +} // namespace - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data0, tensor_x->ElementsNum() * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); +TEST_F(TestOpenCL_Activation, RELU) { + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = input_shape; + float input_data[] = {-1, 1, 2, 3, -1, -2, 3, -4, 5, -6, 7, 9}; + float output_data[] = {0, 1, 2, 3, 0, 0, 3, 0, 5, 0, 7, 9}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::ActivationType_RELU); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } +} - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); +TEST_F(TestOpenCL_Activation, RELU6) { + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = input_shape; + float input_data[] = {-1, 1, 2, 3, -1, -2, 3, -4, 5, -6, 7, 9}; + float output_data[] = {0, 1, 2, 3, 0, 0, 3, 0, 5, 0, 6, 6}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::ActivationType_RELU6); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - MS_LOG(INFO) << "TestActivation passed"; } -TEST_F(TestActivationOpenCL, ActivationReLUFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {-1.0f, 1.0f, 2.0f, 3.0f, -1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 9.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 3.0f, 0.0f, 5.0f, 0.0f, 7.0f, 9.0f}; - RunTestCaseActivation(input_data.data(), in_shape0, output_data.data(), out_shape, false, - schema::ActivationType_RELU); +TEST_F(TestOpenCL_Activation, HSIGMOID) { + std::vector input_shape = {2, 10, 1, 4}; + std::vector output_shape = input_shape; + float input_data[] = {2.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5}; + float output_data[] = {0.9166667, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, + 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, + 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::ActivationType_HSIGMOID); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, + fp16_enable ? 1e-3 : 1e-4); + } } -TEST_F(TestActivationOpenCL, ActivationReLUFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {-1.0f, 1.0f, 2.0f, 3.0f, -1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 9.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 0.0f, 0.0f, 3.0f, 0.0f, 5.0f, 0.0f, 7.0f, 9.0f}; - RunTestCaseActivation(input_data.data(), in_shape0, output_data.data(), out_shape, true, schema::ActivationType_RELU); +TEST_F(TestOpenCL_Activation, HSWISH) { + std::vector input_shape = {2, 10, 1, 4}; + std::vector output_shape = input_shape; + float input_data[] = {2.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5, + 7.5, 6, -7.4, -3.5, 5.9, 6.5, -8, 7.4, 5.9, 6.5, -8, 7.4, 7.5, 6, -7.4, -3.5}; + float output_data[] = {2.29166667, 6, 0, 0, 5.9, 6.5, 0, 7.4, 5.9, 6.5, 0, 7.4, 7.5, 6, 0, 0, + 7.5, 6, 0, 0, 5.9, 6.5, 0, 7.4, 5.9, 6.5, 0, 7.4, 7.5, 6, 0, 0, + 7.5, 6, 0, 0, 5.9, 6.5, 0, 7.4, 5.9, 6.5, 0, 7.4, 7.5, 6, 0, 0, + 7.5, 6, 0, 0, 5.9, 6.5, 0, 7.4, 5.9, 6.5, 0, 7.4, 7.5, 6, 0, 0, + 7.5, 6, 0, 0, 5.9, 6.5, 0, 7.4, 5.9, 6.5, 0, 7.4, 7.5, 6, 0, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::ActivationType_HSWISH); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, + fp16_enable ? 1e-2 : 1e-4); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc index b2cf515ed5..070eff9a08 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/argminmax_tests.cc @@ -13,271 +13,176 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h" - -namespace mindspore { -class TestArgMinMaxOpenCL : public mindspore::CommonTest { - public: - TestArgMinMaxOpenCL() {} -}; -template -void test_main_argminmax(void *input_data, void *correct_data, const std::vector &input_shape, - const std::vector &output_shape, ArgMinMaxParameter *param, TypeId data_type, - schema::Format format) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime_wrap = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = ocl_runtime_wrap.GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - auto tensor_a = lite::Tensor(TypeId(data_type), input_shape, format); - auto tensor_c = lite::Tensor(TypeId(data_type), output_shape, format); - std::vector inputs{&tensor_a}; - std::vector outputs{&tensor_c}; - size_t input_size = tensor_a.Size(); - - auto *pkernel = - new (std::nothrow) kernel::ArgMinMaxOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (pkernel == nullptr) { - MS_LOG(INFO) << "new SpaceToBatchNDOpenCLKernel failed "; - return; +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/arg_min_max_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_ArgMinMax : public CommonTest {}; + +namespace { +// PrimitiveType_ArgMin: src/ops/populate/argmin_populate.cc +// PrimitiveType_ArgMax: src/ops/populate/argmax_populate.cc +OpParameter *CreateParameter(schema::PrimitiveType type, int axis, int topk, bool out_value, bool keep_dims = false, + int axis_type = 0) { + auto *param = test::CreateParameter(type); + param->axis_ = axis; + param->topk_ = topk; + param->axis_type_ = axis_type; + param->out_value_ = out_value; + param->keep_dims_ = keep_dims; + return reinterpret_cast(param); +} +} // namespace + +TEST_F(TestOpenCL_ArgMinMax, axis0topk2index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 0; + int topk = 2; + bool out_value = false; + std::vector input_shape = {3, 2, 2, 2}; + std::vector output_shape = {2, 2, 2, 2}; + float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; + float output_data[] = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - pkernel->Init(); +} - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); +TEST_F(TestOpenCL_ArgMinMax, axis0topk2value) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 0; + int topk = 2; + bool out_value = true; + std::vector input_shape = {3, 2, 2, 2}; + std::vector output_shape = {2, 2, 2, 2}; + float input_data[] = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; + float output_data[] = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } +} - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{pkernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - delete pkernel; - MS_LOG(INFO) << " new SubGraphOpenCLKernel failed "; - return; +TEST_F(TestOpenCL_ArgMinMax, axis1topk2index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 1; + int topk = 2; + bool out_value = false; + std::vector input_shape = {2, 3, 2, 3}; + std::vector output_shape = {2, 2, 2, 3}; + float input_data[] = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, + 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; + float output_data[] = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - sub_graph->Init(); +} - MS_LOG(INFO) << " init tensors "; - T *input_ptr = reinterpret_cast(inputs[0]->MutableData()); - memcpy(input_ptr, input_data, input_size); - std::cout << "==================input data================" << std::endl; - for (auto i = 0; i < inputs[0]->ElementsNum(); ++i) { - std::cout << input_ptr[i] << ", "; +TEST_F(TestOpenCL_ArgMinMax, axis1topk2value) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 1; + int topk = 2; + bool out_value = true; + std::vector input_shape = {2, 3, 2, 3}; + std::vector output_shape = {2, 2, 2, 3}; + float input_data[] = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, + 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; + float output_data[] = {100, 12, 200, 34, 50, 36, 11, 6, 17, 10, 35, 30, + 18, 80, 90, 40, 22, 120, 10, 20, 30, 10, 11, 60}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::cout << std::endl; - - sub_graph->Run(); +} - auto *output_data = reinterpret_cast(outputs[0]->MutableData()); - std::cout << "==================output data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << output_data[i] << ", "; +TEST_F(TestOpenCL_ArgMinMax, axis2topk1index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 2; + int topk = 1; + bool out_value = false; + std::vector input_shape = {2, 3, 3, 3}; + std::vector output_shape = {2, 3, 1, 3}; + float input_data[] = {10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, + 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, + 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12}; + float output_data[] = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::cout << std::endl; - std::cout << "==================correct data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << static_cast(correct_data)[i] << ", "; - } - std::cout << std::endl; - CommonTest::CompareOutputData(output_data, static_cast(correct_data), outputs[0]->ElementsNum(), 0.0001); - delete sub_graph; } -TEST_F(TestArgMinMaxOpenCL, axis0topk2index) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - std::vector in_data = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, - 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; - std::vector except_out = {0, 2, 1, 0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 0, 2, 2}; - param->dims_size_ = 4; - param->axis_ = 0; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = false; - std::vector in_shape = {3, 2, 2, 2}; - std::vector out_shape = {2, 2, 2, 2}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); -} -TEST_F(TestArgMinMaxOpenCL, axis0topk2value) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; +TEST_F(TestOpenCL_ArgMinMax, axis2topk2value) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 2; + int topk = 2; + bool out_value = true; + std::vector input_shape = {2, 2, 3, 5}; + std::vector output_shape = {1, 2, 2, 5}; + float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; + float output_data[] = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::vector in_data = {100, 2, 4, 50, 11, 12, 34, 35, 10, 20, 40, 5, - 7, 80, 10, 11, 55, 25, 5, 15, 18, 8, 15, 16}; - std::vector except_out = {100, 25, 40, 50, 18, 80, 34, 35, 55, 20, 5, 15, 11, 12, 15, 16}; - param->dims_size_ = 4; - param->axis_ = 0; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = true; - std::vector in_shape = {3, 2, 2, 2}; - std::vector out_shape = {2, 2, 2, 2}; - - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); } -TEST_F(TestArgMinMaxOpenCL, axis1topk2index) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - std::vector in_data = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, - 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; - std::vector except_out = {0, 1, 0, 1, 0, 1, 1, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1, 0, 0, 0, 1, 1, 0}; - param->dims_size_ = 4; - param->axis_ = 1; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = false; - std::vector in_shape = {2, 3, 2, 3}; - std::vector out_shape = {2, 2, 2, 3}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); -} -TEST_F(TestArgMinMaxOpenCL, axis1topk2value) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; +TEST_F(TestOpenCL_ArgMinMax, axis2topk2index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 2; + int topk = 2; + bool out_value = false; + std::vector input_shape = {2, 2, 3, 5}; + std::vector output_shape = {2, 2, 2, 5}; + float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; + float output_data[] = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, + 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::vector in_data = {100, 2, 200, 4, 50, 6, 11, 12, 13, 34, 35, 36, 9, 6, 17, 10, 20, 30, - 10, 20, 30, 40, 5, 60, 7, 80, 90, 10, 11, 120, 18, 5, 16, 9, 22, 23}; - std::vector except_out = {100, 12, 200, 34, 50, 36, 11, 6, 17, 10, 35, 30, - 18, 80, 90, 40, 22, 120, 10, 20, 30, 10, 11, 60}; - param->dims_size_ = 4; - param->axis_ = 1; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = true; - std::vector in_shape = {2, 3, 2, 3}; - std::vector out_shape = {2, 2, 2, 3}; - - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); } -TEST_F(TestArgMinMaxOpenCL, axis2topk1index) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - param->dims_size_ = 4; - param->axis_ = 2; - param->topk_ = 1; - param->get_max_ = true; - param->out_value_ = false; - std::vector in_data = {10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, - 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12, - 10, 20, 30, 11, 15, 10, 5, 10, 12, 10, 20, 30, 11, 15, 10, 5, 10, 12}; - std::vector except_out = {1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0}; - std::vector in_shape = {2, 3, 3, 3}; - std::vector out_shape = {2, 3, 1, 3}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); -} -TEST_F(TestArgMinMaxOpenCL, axis2topk2value) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; +TEST_F(TestOpenCL_ArgMinMax, axis3topk2index) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 3; + int topk = 2; + bool out_value = false; + std::vector input_shape = {2, 2, 3, 5}; + std::vector output_shape = {2, 2, 3, 2}; + float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; + float output_data[] = {4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::vector in_data = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, - 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, - 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; - std::vector except_out = {30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50, - 30, 45, 30, 50, 90, 20, 20, 25, 40, 50, 30, 45, 30, 50, 90, 20, 20, 25, 40, 50}; - param->dims_size_ = 4; - param->axis_ = 2; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = true; - std::vector in_shape = {2, 2, 3, 5}; - std::vector out_shape = {1, 2, 2, 5}; - - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); } -TEST_F(TestArgMinMaxOpenCL, axis2topk2index) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - std::vector in_data = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, - 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, - 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; - std::vector except_out = {2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, - 2, 2, 0, 2, 0, 1, 0, 2, 0, 1, 2, 2, 0, 2, 0, 1, 0, 2, 0, 1}; - param->dims_size_ = 4; - param->axis_ = 2; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = false; - std::vector in_shape = {2, 2, 3, 5}; - std::vector out_shape = {2, 2, 2, 5}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); -} -TEST_F(TestArgMinMaxOpenCL, axis3topk2index) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; +TEST_F(TestOpenCL_ArgMinMax, axis3topk2value) { + schema::PrimitiveType type = schema::PrimitiveType_ArgMax; + int axis = 3; + int topk = 2; + bool out_value = true; + std::vector input_shape = {2, 2, 3, 5}; + std::vector output_shape = {2, 2, 3, 2}; + float input_data[] = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, + 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, + 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; + float output_data[] = {90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45, + 90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(type, axis, topk, out_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - std::vector in_data = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, - 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, - 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; - std::vector except_out = {4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1, 4, 3, 4, 0, 3, 1}; - param->dims_size_ = 4; - param->axis_ = 3; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = false; - std::vector in_shape = {2, 2, 3, 5}; - std::vector out_shape = {2, 2, 3, 2}; - - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); } -TEST_F(TestArgMinMaxOpenCL, axis3topk2value) { - ArgMinMaxParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - std::vector in_data = {10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, - 20, 11, 15, 1, 50, 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, - 30, 45, 25, 50, 30, 10, 20, 30, 40, 90, 20, 11, 15, 1, 50, 30, 45, 25, 50, 30}; - std::vector except_out = {90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45, - 90, 40, 50, 20, 50, 45, 90, 40, 50, 20, 50, 45}; - param->dims_size_ = 4; - param->axis_ = 3; - param->topk_ = 2; - param->get_max_ = true; - param->out_value_ = true; - std::vector in_shape = {2, 2, 3, 5}; - std::vector out_shape = {2, 2, 3, 2}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_argminmax(in_data.data(), except_out.data(), in_shape, out_shape, param, data_type, format); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc index 2191fdfc39..99aaf1f626 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_self_tests.cc @@ -13,307 +13,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h" - -namespace mindspore { -class TestArithmeticSelfOpenCLfp16 : public mindspore::CommonTest { - public: - TestArithmeticSelfOpenCLfp16() {} -}; - -class TestArithmeticSelfOpenCLCI : public mindspore::CommonTest { - public: - TestArithmeticSelfOpenCLCI() {} -}; - -template -void CompareOutputData1(T *input_data1, T *output_data, T *correct_data, int size, float err_bound) { - for (size_t i = 0; i < size; i++) { - T abs = fabs(output_data[i] - correct_data[i]); - ASSERT_LE(abs, err_bound); - } -} - -TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfOpenCLFp16) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, output_size; - std::string input1Ppath = "./test_data/in_arithmetic_selffp16.bin"; - std::string correctOutputPath = "./test_data/out_arithmetic_selffp16.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto correctOutput = - reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - - MS_LOG(INFO) << " init tensors "; - - std::vector shape = {1, 2, 2, 144}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - if (input_tensor == nullptr || output_tensor == nullptr) { - MS_LOG(INFO) << " new input_tensor or output_tensor failed "; - return; - } - std::vector inputs{input_tensor}; - std::vector outputs{output_tensor}; - - MS_LOG(INFO) << " initialize param "; - auto param = reinterpret_cast(malloc(sizeof(ArithmeticSelfParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->op_parameter_.type_ = schema::PrimitiveType_Sin; - auto *arithmeticself_kernel = - new (std::nothrow) kernel::ArithmeticSelfOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (arithmeticself_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ArithmeticSelfOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - arithmeticself_kernel->Init(); - // to do allocate memory for inputs and outputs - for (auto &input_tensor_ : inputs) { - input_tensor_->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{arithmeticself_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete arithmeticself_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, input1_size); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/arithmetic_self_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_ArithmeticSelf : public CommonTest {}; + +namespace { +// PrimitiveType_Abs +// PrimitiveType_Cos +// PrimitiveType_Sin +// PrimitiveType_Log +// PrimitiveType_Neg +// PrimitiveType_NegGrad +// PrimitiveType_LogGrad +// PrimitiveType_Sqrt +// PrimitiveType_Square +// PrimitiveType_Rsqrt +// PrimitiveType_LogicalNot +// PrimitiveType_Floor +// PrimitiveType_Ceil +// PrimitiveType_Round: src/ops/populate/arithmetic_self_populate.cc +OpParameter *CreateParameter(schema::PrimitiveType type) { + auto *param = test::CreateParameter(type); + return reinterpret_cast(param); } +} // namespace -TEST_F(TestArithmeticSelfOpenCLCI, ArithmeticSelfRound) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - float input_data1[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f, - 0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f}; - float correctOutput[] = {1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, - 1.0f, 0.0f, 1.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f}; - - MS_LOG(INFO) << " init tensors "; +TEST_F(TestOpenCL_ArithmeticSelf, Round) { std::vector shape = {1, 1, 4, 4}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - if (input_tensor == nullptr || output_tensor == nullptr) { - MS_LOG(INFO) << " new input_tensor or output_tensor failed "; - return; - } - std::vector inputs{input_tensor}; - std::vector outputs{output_tensor}; - - MS_LOG(INFO) << " initialize param "; - auto param = reinterpret_cast(malloc(sizeof(ArithmeticSelfParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->op_parameter_.type_ = schema::PrimitiveType_Round; - auto *arithmeticself_kernel = - new (std::nothrow) kernel::ArithmeticSelfOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (arithmeticself_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ArithmeticSelfOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - arithmeticself_kernel->Init(); - // to do allocate memory for inputs and outputs - for (auto &input_tensor_ : inputs) { - input_tensor_->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{arithmeticself_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete arithmeticself_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} - -TEST_F(TestArithmeticSelfOpenCLfp16, ArithmeticSelfdim2Fp16) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, output_size; - std::string input1Ppath = "./test_data/in_arithmetic_selffp16.bin"; - std::string correctOutputPath = "./test_data/out_arithmetic_selffp16.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto correctOutput = - reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - - MS_LOG(INFO) << " init tensors "; - - std::vector shape = {1, 512}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - auto *input_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NC, tensor_type); - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NC, tensor_type); - if (input_tensor == nullptr || output_tensor == nullptr) { - MS_LOG(INFO) << " new input_tensor or output_tensor failed "; - return; - } - std::vector inputs{input_tensor}; - std::vector outputs{output_tensor}; - MS_LOG(INFO) << " initialize param "; - auto param = reinterpret_cast(malloc(sizeof(ArithmeticSelfParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->op_parameter_.type_ = schema::PrimitiveType_Sin; - auto *arithmeticself_kernel = - new (std::nothrow) kernel::ArithmeticSelfOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (arithmeticself_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ArithmeticSelfOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - arithmeticself_kernel->Init(); - // to do allocate memory for inputs and outputs - for (auto &input_tensor_ : inputs) { - input_tensor_->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{arithmeticself_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete arithmeticself_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, input1_size); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(input_data1, output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; + float input_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}; + float output_data[] = {1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Round); + TestMain({{shape, input_data, VAR}}, {shape, output_data}, param, fp16_enable); } - delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc index 2f751d42cf..b2990d4e05 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/arithmetic_tests.cc @@ -13,176 +13,117 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/arithmetic_common.h" -namespace mindspore { -class TestArithmeticOpenCL : public mindspore::CommonTest { - public: - TestArithmeticOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseArithmetic(void *input_data0, const std::vector &input_shape, void *input_data1, - const std::vector &weight_shape, void *output_data, const std::vector &out_shape, - bool enable_fp16, int op_type, int act_type = schema::ActivationType_NO_ACTIVATION) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(ArithmeticParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - int input0_size = std::accumulate(input_shape.begin(), input_shape.end(), 1LL, std::multiplies()); - int input1_size = std::accumulate(weight_shape.begin(), weight_shape.end(), 1LL, std::multiplies()); +class TestOpenCL_Arithmetic : public CommonTest {}; + +namespace { +// PrimitiveType_RealDiv +// PrimitiveType_LogicalAnd +// PrimitiveType_LogicalOr +// PrimitiveType_Equal +// PrimitiveType_Less +// PrimitiveType_Greater +// PrimitiveType_GreaterEqual +// PrimitiveType_NotEqual +// PrimitiveType_LessEqual +// PrimitiveType_Maximum +// PrimitiveType_Minimum +// PrimitiveType_FloorDiv +// PrimitiveType_FloorMod +// PrimitiveType_SquaredDifference: src/ops/populate/arithmetic_populate.cc +// PrimitiveType_Add: src/ops/populate/add_populate.cc +// PrimitiveType_Sub: src/ops/populate/sub_populate.cc +// PrimitiveType_Mul: src/ops/populate/mul_populate.cc +// PrimitiveType_Div: src/ops/populate/div_populate.cc +// PrimitiveType_Eltwise: src/ops/populate/eltwise_populate.cc +// PrimitiveType_BiasAdd: src/ops/populate/bias_add_populate.cc +OpParameter *CreateParameter(schema::PrimitiveType type, const std::vector &input0_shape, + const std::vector &input1_shape, + schema::ActivationType act_type = schema::ActivationType_NO_ACTIVATION) { + auto *param = test::CreateParameter(type); + int input0_size = std::accumulate(input0_shape.begin(), input0_shape.end(), 1, std::multiplies<>()); + int input1_size = std::accumulate(input1_shape.begin(), input1_shape.end(), 1, std::multiplies<>()); if (input0_size != input1_size) { param->broadcasting_ = true; } - param->op_parameter_.type_ = op_type; param->activation_type_ = act_type; - auto tensor_x_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), input_shape); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - - auto tensor_w_ptr = std::make_unique( - TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), weight_shape, schema::Format_NHWC, - input1_size != 1 ? lite::Tensor::Category::CONST_TENSOR : lite::Tensor::Category::CONST_SCALAR); - auto tensor_w = tensor_w_ptr.get(); - if (tensor_w == nullptr) { - MS_LOG(ERROR) << "tensor_w create error."; - return; - } - tensor_w->set_data(input_data1); - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_w}; - std::vector outputs{tensor_out}; - auto op_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - inputs[0]->MallocData(allocator); + return reinterpret_cast(param); +} +} // namespace - std::vector kernels{op_kernel}; +TEST_F(TestOpenCL_Arithmetic, ElementwiseAdd) { + std::vector input0_shape = {1, 2, 2, 3}; + std::vector input1_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 2, 2, 3}; + float input0_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float input1_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float output_data[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24}; - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data0, tensor_x->ElementsNum() * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Add, input0_shape, input1_shape); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); } - - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - MS_LOG(INFO) << "TestArithmetic passed"; } -TEST_F(TestArithmeticOpenCL, ArithmeticElementwiseAddFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector in_shape1 = {n, h, w, c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector output_data = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f, 18.0f, 20.0f, 22.0f, 24.0f}; - RunTestCaseArithmetic(input_data.data(), in_shape0, weight_data.data(), in_shape1, output_data.data(), out_shape, - false, schema::PrimitiveType_Add); +TEST_F(TestOpenCL_Arithmetic, ScalarMul) { + std::vector input0_shape = {1, 2, 2, 3}; + std::vector input1_shape = {1}; + std::vector output_shape = {1, 2, 2, 3}; + float input0_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float input1_data[] = {2}; + float output_data[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Mul, input0_shape, input1_shape); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -TEST_F(TestArithmeticOpenCL, ArithmeticScalarMulFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector in_shape1 = {1}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector weight_data = {2.0f}; - std::vector output_data = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f, 14.0f, 16.0f, 18.0f, 20.0f, 22.0f, 24.0f}; - RunTestCaseArithmetic(input_data.data(), in_shape0, weight_data.data(), in_shape1, output_data.data(), out_shape, - false, schema::PrimitiveType_Mul); +TEST_F(TestOpenCL_Arithmetic, BroadcastSubReLU6) { + std::vector input0_shape = {1, 2, 2, 3}; + std::vector input1_shape = {3}; + std::vector output_shape = {1, 2, 2, 3}; + float input0_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float input1_data[] = {1, 2, 3}; + float output_data[] = {0, 0, 0, 3, 3, 3, 6, 6, 6, 6, 6, 6}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Sub, input0_shape, input1_shape, schema::ActivationType_RELU6); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -TEST_F(TestArithmeticOpenCL, ArithmeticBroadcastSubReLU6Fp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector in_shape1 = {c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.0f, 0.0f, 3.0f, 3.0f, 3.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f, 6.0f}; - RunTestCaseArithmetic(input_data.data(), in_shape0, weight_data.data(), in_shape1, output_data.data(), out_shape, - false, schema::PrimitiveType_Sub, schema::ActivationType_RELU6); +TEST_F(TestOpenCL_Arithmetic, BroadcastSub2) { + std::vector input0_shape = {1, 3}; + std::vector input1_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 2, 2, 3}; + float input0_data[] = {1, 2, 3}; + float input1_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float output_data[] = {0, 0, 0, -3, -3, -3, -6, -6, -6, -9, -9, -9}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Sub, input0_shape, input1_shape); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -TEST_F(TestArithmeticOpenCL, ArithmeticBroadcastSub2Fp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, c}; - std::vector in_shape1 = {n, h, w, c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector output_data = {0.0f, 0.0f, 0.0f, -3.0f, -3.0f, -3.0f, -6.0f, -6.0f, -6.0f, -9.0f, -9.0f, -9.0f}; - RunTestCaseArithmetic(input_data.data(), in_shape0, weight_data.data(), in_shape1, output_data.data(), out_shape, - false, schema::PrimitiveType_Sub); +TEST_F(TestOpenCL_Arithmetic, ElementwiseDiv) { + std::vector input0_shape = {1, 2, 2, 3}; + std::vector input1_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 2, 2, 3}; + float input0_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float input1_data[] = {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}; + float output_data[] = {1, 2, 3, 2, 2.5, 3, 7, 8, 9, 5, 5.5, 6}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(schema::PrimitiveType_Div, input0_shape, input1_shape); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -TEST_F(TestArithmeticOpenCL, ArithmeticElementwiseDivFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector in_shape1 = {n, h, w, c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f}; - std::vector output_data = {1.0f, 2.0f, 3.0f, 2.0f, 2.5, 3.0f, 7.0f, 8.0f, 9.0f, 5.0f, 5.5, 6.0f}; - RunTestCaseArithmetic(input_data.data(), in_shape0, weight_data.data(), in_shape1, output_data.data(), out_shape, - true, schema::PrimitiveType_Div); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batch_to_space_nd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batch_to_space_nd_tests.cc index c8643c5176..984d24854f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batch_to_space_nd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batch_to_space_nd_tests.cc @@ -13,100 +13,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h" +#include +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/batch_to_space.h" -namespace mindspore { -class TestBatchToSpaceNDOpenCL : public mindspore::CommonTest { - public: - TestBatchToSpaceNDOpenCL() {} -}; -template -void test_main_batch_to_space_nd(void *input_data, void *correct_data, const std::vector &input_shape, - BatchToSpaceParameter *param, TypeId data_type, schema::Format format) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime_wrap = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = ocl_runtime_wrap.GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); +namespace mindspore::lite::opencl::test { - std::vector output_shape = input_shape; - output_shape[0] = input_shape[0] / param->block_shape_[0] / param->block_shape_[1]; - output_shape[1] = input_shape[1] * param->block_shape_[0] - param->crops_[0] - param->crops_[1]; - output_shape[2] = input_shape[2] * param->block_shape_[1] - param->crops_[2] - param->crops_[3]; +class TestOpenCL_BatchToSpaceND : public CommonTest {}; - auto tensor_a = lite::Tensor(TypeId(data_type), input_shape, format); - auto tensor_c = lite::Tensor(TypeId(data_type), output_shape, format); - std::vector inputs{&tensor_a}; - std::vector outputs{&tensor_c}; - size_t input_size = tensor_a.Size(); - - auto *pkernel = - new (std::nothrow) kernel::BatchToSpaceNDOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (pkernel == nullptr) { - MS_LOG(INFO) << "new BatchToSpaceNDOpenCLKernel failed "; - return; - } - pkernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{pkernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - delete pkernel; - MS_LOG(INFO) << " new SubGraphOpenCLKernel failed "; - return; - } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - T *input_ptr = reinterpret_cast(inputs[0]->MutableData()); - memcpy(input_ptr, input_data, input_size); - std::cout << "==================input data================" << std::endl; - for (auto i = 0; i < inputs[0]->ElementsNum(); ++i) { - std::cout << input_ptr[i] << ", "; - } - std::cout << std::endl; - - sub_graph->Run(); - - auto *output_data = reinterpret_cast(outputs[0]->MutableData()); - std::cout << "==================output data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << output_data[i] << ", "; - } - std::cout << std::endl; - std::cout << "==================correct data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << static_cast(correct_data)[i] << ", "; - } - std::cout << std::endl; - CommonTest::CompareOutputData(output_data, static_cast(correct_data), outputs[0]->ElementsNum(), 0.0001); - delete sub_graph; +namespace { +// PrimitiveType_BatchToSpaceND: src/ops/populate/batch_to_space_populate.cc +OpParameter *CreateParameter(int block_shape[], int crops[], const std::vector &input_shape, + std::vector *output_shape) { + auto *param = test::CreateParameter(schema::PrimitiveType_BatchToSpaceND); + memcpy(param->block_shape_, block_shape, sizeof(param->block_shape_)); + memcpy(param->crops_, crops, sizeof(param->crops_)); + *output_shape = {input_shape[0] / param->block_shape_[0] / param->block_shape_[1], + input_shape[1] * param->block_shape_[0] - param->crops_[0] - param->crops_[1], + input_shape[2] * param->block_shape_[1] - param->crops_[2] - param->crops_[3], input_shape[3]}; + return reinterpret_cast(param); } -TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H2W2Pad2020) { - std::vector input_shape{4, 5, 5, 4}; - BatchToSpaceParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - param->block_shape_[0] = 2; - param->block_shape_[1] = 2; - param->crops_[0] = 2; - param->crops_[1] = 0; - param->crops_[2] = 2; - param->crops_[3] = 0; +} // namespace + +TEST_F(TestOpenCL_BatchToSpaceND, H2W2Pad2020) { + std::vector input_shape = {4, 5, 5, 4}; + int block_shape[] = {2, 2}; + int crops[] = {2, 0, 2, 0}; + std::vector output_shape; float input_data[] = { 172, 47, 117, 192, 67, 251, 195, 103, 9, 211, 21, 242, 36, 87, 70, 216, 88, 140, 58, 193, 230, 39, 87, 174, 88, 81, 165, 25, 77, 72, 9, 148, 115, 208, 243, 197, 254, 79, 175, 192, 82, 99, 216, 177, 243, 29, @@ -125,9 +58,8 @@ TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H2W2Pad2020) { 131, 46, 218, 178, 108, 3, 31, 9, 138, 27, 173, 199, 167, 61, 85, 97, 44, 34, 162, 88, 33, 133, 232, 36, 0, 203, 34, 197, 126, 181, 254, 80, 190, 136, 189, 129, 209, 112, 35, 120, 91, 168, 116, 36, 176, 25, 67, 103, 252, 35, 114, 30, 29, 241, 33, 146, 17, 221, 84, 253, 2, 69, 101, 140, 44, 117, 253, 66, 111, - 91, 85, 167, 39, 203, 150, 158, 145, 198, - }; - float correct_data[] = { + 91, 85, 167, 39, 203, 150, 158, 145, 198}; + float output_data[] = { 88, 81, 165, 25, 85, 48, 49, 69, 77, 72, 9, 148, 169, 163, 192, 95, 115, 208, 243, 197, 197, 94, 0, 113, 254, 79, 175, 192, 178, 36, 162, 48, 237, 139, 252, 86, 218, 178, 108, 3, 205, 121, 109, 75, 31, 9, 138, 27, 184, 16, 152, 157, 173, 199, 167, 61, 149, 110, 25, 208, 85, 97, 44, 34, 243, 29, @@ -140,22 +72,18 @@ TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H2W2Pad2020) { 32, 182, 35, 102, 119, 11, 128, 38, 19, 174, 174, 82, 91, 128, 42, 115, 184, 188, 142, 99, 53, 140, 232, 77, 30, 24, 230, 35, 214, 254, 101, 140, 44, 117, 189, 197, 215, 43, 253, 66, 111, 91, 32, 11, 104, 212, 85, 167, 39, 203, 138, 182, 235, 165, 150, 158, 145, 198}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_batch_to_space_nd(input_data, correct_data, input_shape, param, data_type, format); -} -TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H3W3Pad0101) { - std::vector input_shape{9, 3, 3, 4}; - BatchToSpaceParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_shape, crops, input_shape, &output_shape); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - param->block_shape_[0] = 3; - param->block_shape_[1] = 3; - param->crops_[0] = 0; - param->crops_[1] = 1; - param->crops_[2] = 0; - param->crops_[3] = 1; +} + +TEST_F(TestOpenCL_BatchToSpaceND, H3W3Pad0101) { + std::vector input_shape = {9, 3, 3, 4}; + int block_shape[] = {3, 3}; + int crops[] = {0, 1, 0, 1}; + std::vector output_shape; float input_data[] = { 172, 47, 117, 192, 67, 251, 195, 103, 9, 211, 21, 242, 36, 87, 70, 216, 88, 140, 58, 193, 230, 39, 87, 174, 88, 81, 165, 25, 77, 72, 9, 148, 115, 208, 243, 197, 254, 79, 175, 192, 82, 99, 216, 177, @@ -172,7 +100,7 @@ TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H3W3Pad0101) { 182, 207, 11, 166, 111, 93, 249, 129, 223, 118, 44, 216, 125, 24, 67, 210, 239, 3, 234, 204, 230, 35, 214, 254, 189, 197, 215, 43, 32, 11, 104, 212, 138, 182, 235, 165, 125, 156, 111, 232, 2, 27, 211, 217, 151, 53, 51, 174, 148, 181, 29, 67, 35, 39, 137, 73, 41, 151, 131, 46}; - float correct_data[] = { + float output_data[] = { 172, 47, 117, 192, 254, 79, 175, 192, 38, 232, 244, 17, 67, 251, 195, 103, 82, 99, 216, 177, 79, 132, 105, 42, 9, 211, 21, 242, 243, 29, 147, 147, 127, 244, 131, 204, 205, 112, 231, 149, 43, 104, 11, 2, 100, 180, 232, 78, 201, 127, 0, 138, 51, 80, 32, 182, 143, 148, 227, 186, 114, 43, 186, 127, 180, 67, @@ -185,40 +113,11 @@ TEST_F(TestBatchToSpaceNDOpenCL, NHWC4H3W3Pad0101) { 203, 114, 142, 99, 53, 140, 77, 72, 9, 148, 183, 28, 34, 128, 121, 170, 84, 203, 115, 208, 243, 197, 128, 164, 53, 133, 197, 94, 0, 113, 227, 148, 209, 50, 226, 107, 13, 112, 178, 36, 162, 48, 155, 14, 41, 58, 40, 72, 19, 95, 93, 131, 98, 42, 193, 36, 10, 86}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_batch_to_space_nd(input_data, correct_data, input_shape, param, data_type, format); -} -TEST_F(TestBatchToSpaceNDOpenCL, NC4HW4H2W2Pad2222) { - std::vector input_shape{4, 5, 5, 4}; - BatchToSpaceParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_shape, crops, input_shape, &output_shape); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - param->block_shape_[0] = 2; - param->block_shape_[1] = 2; - param->crops_[0] = 2; - param->crops_[1] = 2; - param->crops_[2] = 2; - param->crops_[3] = 2; - float input_data[] = {172, 47, 117, 192, 67, 251, 195, 103, 9, 211, 21, 242, 36, 87, 70, 216, 88, 140, - 58, 193, 230, 39, 87, 174, 88, 81, 165, 25, 77, 72, 9, 148, 115, 208, 243, 197, - 254, 79, 175, 192, 82, 99, 216, 177, 243, 29, 147, 147, 142, 167, 32, 193, 9, 185, - 127, 32, 31, 202, 244, 151, 163, 254, 203, 114, 183, 28, 34, 128, 128, 164, 53, 133, - 38, 232, 244, 17, 79, 132, 105, 42, 186, 31, 120, 1, 65, 231, 169, 57, 35, 102, - 119, 11, 174, 82, 91, 128, 142, 99, 53, 140, 121, 170, 84, 203, 68, 6, 196, 47, - 127, 244, 131, 204, 100, 180, 232, 78, 143, 148, 227, 186, 23, 207, 141, 117, 85, 48, - 49, 69, 169, 163, 192, 95, 197, 94, 0, 113, 178, 36, 162, 48, 93, 131, 98, 42}; - float correct_data[] = {88, 81, 165, 25, 85, 48, 49, 69, 77, 72, 9, 148, 169, 163, 192, 95, 115, 208, - 243, 197, 197, 94, 0, 113, 237, 139, 252, 86, 218, 178, 108, 3, 205, 121, 109, 75, - 31, 9, 138, 27, 184, 16, 152, 157, 173, 199, 167, 61, 243, 29, 147, 147, 205, 112, - 231, 149, 142, 167, 32, 193, 201, 127, 0, 138, 9, 185, 127, 32, 114, 43, 186, 127, - 189, 83, 161, 104, 232, 36, 0, 203, 160, 228, 251, 251, 34, 197, 126, 181, 121, 70, - 213, 31, 254, 80, 190, 136, 183, 28, 34, 128, 123, 195, 82, 174, 128, 164, 53, 133, - 227, 148, 209, 50, 38, 232, 244, 17, 155, 14, 41, 58, 182, 207, 11, 166, 116, 36, - 176, 25, 111, 93, 249, 129, 67, 103, 252, 35, 223, 118, 44, 216, 114, 30, 29, 241}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NCHW; - test_main_batch_to_space_nd(input_data, correct_data, input_shape, param, data_type, format); } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc index 644380e851..8cfb153530 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/batchnorm_tests.cc @@ -13,380 +13,50 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/batchnorm_parameter.h" -namespace mindspore { -class TestBatchnormOpenCLfp32 : public mindspore::CommonTest { - public: - TestBatchnormOpenCLfp32() {} -}; -class TestBatchnormOpenCLfp16 : public mindspore::CommonTest { - public: - TestBatchnormOpenCLfp16() {} -}; -class TestBatchnormOpenCLCI : public mindspore::CommonTest { - public: - TestBatchnormOpenCLCI() {} -}; +namespace mindspore::lite::opencl::test { -TEST_F(TestBatchnormOpenCLCI, Batchnormfp32CI) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); +class TestOpenCL_BatchNorm : public CommonTest {}; - MS_LOG(INFO) << " Read tensors from .bin "; +namespace { +// PrimitiveType_BatchNorm: src/ops/populate/batch_norm_populate.cc +OpParameter *CreateParameter(float epsilon) { + auto *param = test::CreateParameter(schema::PrimitiveType_BatchNorm); + param->epsilon_ = epsilon; + return reinterpret_cast(param); +} +} // namespace + +TEST_F(TestOpenCL_BatchNorm, test0) { std::vector input_shape = {1, 2, 2, 8}; + std::vector weight_shape = {1, 1, 1, input_shape[3]}; std::vector output_shape = {1, 2, 2, 8}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - float input_data[] = {2.471454, -2.1379554, -0.0904604, 1.2928944, -0.19215967, -0.8677279, -0.12759617, 1.2242758, -0.06398406, -0.4041858, 0.20352598, -2.067808, 0.52113044, -1.567617, 0.28003863, 0.41367245, 0.77298605, 0.29908583, 1.4015813, 1.330567, 1.760135, 0.6320845, 0.6995399, -1.208123, -1.9738104, -1.3283046, 1.022744, 0.02741058, 0.84505165, -0.89434445, 1.983211, -0.5485428}; - float correct_data[] = {0.7505676, 0.515882, 0.26147857, 1.6026789, 0.47575232, 0.50116986, 0.33589783, - 1.4884706, 0.56019205, 0.7832671, 0.53893626, -0.5093127, 0.71395767, 0.18509413, - 0.33990562, 0.891792, 0.6230367, 0.89172685, 1.6696336, 1.6263539, 1.1277269, - 1.1784974, 0.34403008, -0.3019984, 0.4167911, 0.6407478, 1.3120956, 0.80740136, - 0.8221321, 0.4891496, 0.3566509, 0.18351318}; - float mean_data[] = {0.3016613, -0.89284, 0.63434774, 0.145766, 0.73353934, -0.6744012, 0.7087985, -0.02967937}; - float var_data[] = {2.5604038, 0.84985304, 0.36261332, 1.9083935, 0.4920925, 0.6476224, 0.6269014, 0.8567283}; float scale_data[] = {0.1201471, 0.142174, 0.5683258, 0.86815494, 0.23426804, 0.3634345, 0.0077846, 0.6813278}; float offset_data[] = {0.58764684, 0.70790595, 0.945536, 0.8817803, 0.78489226, 0.5884778, 0.3441211, 0.5654443}; - - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_mean = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_var = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_scale = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_offset = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr || tensor_mean == nullptr || tensor_var == nullptr || tensor_scale == nullptr || - tensor_offset == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - delete tensor_data; - delete tensor_mean; - delete tensor_var; - delete tensor_scale; - delete tensor_offset; - return; - } - std::vector inputs = {tensor_data, tensor_scale, tensor_offset, tensor_mean, tensor_var}; - std::vector outputs{output_tensor}; - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(BatchNormParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new BatchNormParameter failed "; - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->epsilon_ = pow(10, -5); - auto *batchnorm_kernel = - new (std::nothrow) kernel::BatchNormOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (batchnorm_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::BatchNorm_kernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - batchnorm_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{batchnorm_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete batchnorm_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, sizeof(input_data)); - memcpy(inputs[1]->data_c(), scale_data, sizeof(scale_data)); - memcpy(inputs[2]->data_c(), offset_data, sizeof(offset_data)); - memcpy(inputs[3]->data_c(), mean_data, sizeof(mean_data)); - memcpy(inputs[4]->data_c(), var_data, sizeof(var_data)); - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} - -TEST_F(TestBatchnormOpenCLfp16, Batchnormfp16input_dim4) { - MS_LOG(INFO) << "begin test"; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " Read tensors from .bin "; - std::vector input_shape = {1, 256, 256, 48}; - std::vector output_shape = {1, 256, 256, 48}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - - // get the input from .bin - size_t input_size, output_size; - std::string input_path = "./test_data/batchnorm_in_datafp16.bin"; - std::string mean_path = "./test_data/batchnorm_meanfp16.bin"; - std::string var_path = "./test_data/batchnorm_varfp16.bin"; - std::string offset_path = "./test_data/batchnorm_offsetfp16.bin"; - std::string scale_path = "./test_data/batchnorm_scalefp16.bin"; - std::string output_path = "./test_data/batchnorm_correctdatafp16.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - size_t mean_size, var_size, scale_size, offset_size; - auto mean_data = reinterpret_cast(mindspore::lite::ReadFile(mean_path.c_str(), &mean_size)); - auto var_data = reinterpret_cast(mindspore::lite::ReadFile(var_path.c_str(), &var_size)); - auto scale_data = reinterpret_cast(mindspore::lite::ReadFile(scale_path.c_str(), &scale_size)); - auto offset_data = reinterpret_cast(mindspore::lite::ReadFile(offset_path.c_str(), &offset_size)); - - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_mean = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_var = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_scale = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_offset = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr || tensor_mean == nullptr || tensor_var == nullptr || tensor_scale == nullptr || - tensor_offset == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - delete tensor_data; - delete tensor_mean; - delete tensor_var; - delete tensor_scale; - delete tensor_offset; - return; - } - std::vector inputs = {tensor_data, tensor_scale, tensor_offset, tensor_mean, tensor_var}; - std::vector outputs{output_tensor}; - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(BatchNormParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new BatchNormParameter failed "; - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->epsilon_ = pow(10, -5); - auto *batchnorm_kernel = - new (std::nothrow) kernel::BatchNormOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (batchnorm_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::BatchNorm_kernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - batchnorm_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{batchnorm_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete batchnorm_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, input_size); - memcpy(inputs[1]->data_c(), scale_data, scale_size); - memcpy(inputs[2]->data_c(), offset_data, offset_size); - memcpy(inputs[3]->data_c(), mean_data, mean_size); - memcpy(inputs[4]->data_c(), var_data, var_size); - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.01)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; + float mean_data[] = {0.3016613, -0.89284, 0.63434774, 0.145766, 0.73353934, -0.6744012, 0.7087985, -0.02967937}; + float var_data[] = {2.5604038, 0.84985304, 0.36261332, 1.9083935, 0.4920925, 0.6476224, 0.6269014, 0.8567283}; + float output_data[] = {0.7505676, 0.515882, 0.26147857, 1.6026789, 0.47575232, 0.50116986, 0.33589783, + 1.4884706, 0.56019205, 0.7832671, 0.53893626, -0.5093127, 0.71395767, 0.18509413, + 0.33990562, 0.891792, 0.6230367, 0.89172685, 1.6696336, 1.6263539, 1.1277269, + 1.1784974, 0.34403008, -0.3019984, 0.4167911, 0.6407478, 1.3120956, 0.80740136, + 0.8221321, 0.4891496, 0.3566509, 0.18351318}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(1e-5); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, scale_data, VAR}, + {weight_shape, offset_data, VAR}, + {weight_shape, mean_data, VAR}, + {weight_shape, var_data, VAR}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-3 : 1e-5); } - delete sub_graph; } -TEST_F(TestBatchnormOpenCLfp32, Batchnormfp32input_dim4) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " Read tensors from .bin "; - std::vector input_shape = {1, 256, 256, 47}; - std::vector output_shape = {1, 256, 256, 47}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - - // get the input from .bin - size_t input_size, output_size; - std::string input_path = "./test_data/batchnorm_in_datafp32.bin"; - std::string mean_path = "./test_data/batchnorm_meanfp32.bin"; - std::string var_path = "./test_data/batchnorm_varfp32.bin"; - std::string offset_path = "./test_data/batchnorm_offsetfp32.bin"; - std::string scale_path = "./test_data/batchnorm_scalefp32.bin"; - std::string output_path = "./test_data/batchnorm_out_datafp32.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - size_t mean_size, var_size, scale_size, offset_size; - auto mean_data = reinterpret_cast(mindspore::lite::ReadFile(mean_path.c_str(), &mean_size)); - auto var_data = reinterpret_cast(mindspore::lite::ReadFile(var_path.c_str(), &var_size)); - auto scale_data = reinterpret_cast(mindspore::lite::ReadFile(scale_path.c_str(), &scale_size)); - auto offset_data = reinterpret_cast(mindspore::lite::ReadFile(offset_path.c_str(), &offset_size)); - - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_mean = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_var = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_scale = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - lite::Tensor *tensor_offset = - new (std::nothrow) lite::Tensor(data_type, {1, 1, 1, input_shape[3]}, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr || tensor_mean == nullptr || tensor_var == nullptr || tensor_scale == nullptr || - tensor_offset == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - delete tensor_data; - delete tensor_mean; - delete tensor_var; - delete tensor_scale; - delete tensor_offset; - return; - } - std::vector inputs = {tensor_data, tensor_scale, tensor_offset, tensor_mean, tensor_var}; - std::vector outputs{output_tensor}; - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(BatchNormParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new BatchNormParameter failed "; - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->epsilon_ = pow(10, -5); - auto *batchnorm_kernel = - new (std::nothrow) kernel::BatchNormOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (batchnorm_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::BatchNorm_kernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - batchnorm_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{batchnorm_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete batchnorm_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, input_size); - memcpy(inputs[1]->data_c(), scale_data, scale_size); - memcpy(inputs[2]->data_c(), offset_data, offset_size); - memcpy(inputs[3]->data_c(), mean_data, mean_size); - memcpy(inputs[4]->data_c(), var_data, var_size); - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc index 932e0660c1..45935df3a9 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/biasadd_tests.cc @@ -29,7 +29,10 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; namespace mindspore { -class TestBiasAddOpenCL : public mindspore::CommonTest {}; + +// PrimitiveType_BiasAdd: src/ops/populate/bias_add_populate.cc + +class TestBiasAddOpenCL : public CommonTest {}; void LoadDataBiasAdd(void *dst, size_t dst_size, const std::string &file_path) { if (file_path.empty()) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc index 2f30341eb0..556d99e17f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/cast_tests.cc @@ -22,8 +22,10 @@ #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h" -namespace mindspore { -class TestCastSelfOpenCL : public mindspore::CommonTest { +// PrimitiveType_Cast: src/ops/populate/cast_populate.cc + +namespace mindspore::lite::opencl::test { +class TestCastSelfOpenCL : public CommonTest { public: TestCastSelfOpenCL() {} }; @@ -208,4 +210,4 @@ TEST_F(TestCastSelfOpenCL, Castfp16tofp32) { } delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc new file mode 100644 index 0000000000..a2f14be286 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.cc @@ -0,0 +1,166 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "ut/src/runtime/kernel/opencl/common.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" +#include "nnacl/conv_parameter.h" + +using mindspore::kernel::LiteKernel; +using mindspore::kernel::SubGraphOpenCLKernel; +using mindspore::lite::KernelRegistry; +using mindspore::schema::Format::Format_NHWC; + +namespace mindspore::lite::opencl::test { + +void TestMain(const std::vector &input_infos, std::tuple, float *> output_info, + OpParameter *op_parameter, bool fp16_enable, float atol, float rtol, bool print_data) { + auto primitive_type = static_cast(op_parameter->type_); + static std::set packed_op = { + schema::PrimitiveType_Conv2D, schema::PrimitiveType_DeConv2D, schema::PrimitiveType_DepthwiseConv2D, + schema::PrimitiveType_DeDepthwiseConv2D, schema::PrimitiveType_MatMul}; + + // simulating benchmark: session::LiteSession::CreateSession() -> session->Init() + MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; + auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); + auto ocl_runtime = runtime_wrapper.GetInstance(); + ocl_runtime->SetFp16Enable(fp16_enable); + EXPECT_TRUE(ocl_runtime->Init() == RET_OK); + + // simulating benchmark: session_->CompileGraph() -> ConvertTensors() + MS_LOG(DEBUG) << "create Tensors & init weight data"; + std::vector tensors; + // firstly, create all Tensors + tensors.reserve(input_infos.size()); // vector's capacity() is 0, so call reserve() avoiding vector re-malloc + for (auto input_info : input_infos) { + auto &shape = std::get<0>(input_info); + auto category = std::get<2>(input_info); + auto data_type = std::get<3>(input_info); + tensors.emplace_back(data_type, shape, Format_NHWC, category); + } + // secondly, init weight Tensor's data + std::vector kernel_inputs; + std::vector subgraph_inputs; + std::map subgraph_inputs_data; + for (int i = 0; i < tensors.size(); ++i) { + auto *tensor = &tensors[i]; + auto *input_data = std::get<1>(input_infos[i]); + kernel_inputs.push_back(tensor); + if (tensor->category() != VAR) { // tensor is weight + // simulating src/lite_session.cc:WeightTensorNeedCopy() + if (packed_op.count(primitive_type)) { + tensor->set_data(input_data); + } else { + memcpy(tensor->MutableData(), input_data, tensor->Size()); + } + } else { + EXPECT_TRUE(tensor->data_type() == kNumberTypeFloat32); + subgraph_inputs.push_back(tensor); + subgraph_inputs_data[tensor] = reinterpret_cast(input_data); + } + } + + const std::vector &output_shape = std::get<0>(output_info); + float *expect_data = std::get<1>(output_info); + auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, VAR); + + // simulating benchmark: session_->CompileGraph() -> scheduler.Schedule() -> BuildKernels() + MS_LOG(DEBUG) << "create OpenCLKernel"; + kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + if (creator == nullptr) { + std::cerr << "can't get registry function for: " << schema::EnumNamePrimitiveType(primitive_type) + << ". Maybe you forget setting op_parameter_.type_ for OpParameter." << std::endl; + free(op_parameter); + FAIL(); + } + auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr); + if (kernel == nullptr) { + std::cerr << "call registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; + free(op_parameter); + FAIL(); + } + kernel->set_name(schema::EnumNamesPrimitiveType()[primitive_type]); + + // simulating benchmark: session_->CompileGraph() -> scheduler.Schedule() -> ConstructSubGraphs() + MS_LOG(DEBUG) << "create SubGraph"; + std::vector kernels{kernel}; + auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels); + if (sub_graph == nullptr) { + return; + } + + // simulating benchmark: session_->CompileGraph() -> PrepareKernels() -> SubGraphOpenCLKernel.Prepare() + MS_LOG(DEBUG) << "call sub_graph->Prepare()"; + EXPECT_TRUE(sub_graph->Prepare() == RET_OK); // will set Tensor's allocator be OpenCLAllocator + + // simulating benchmark: model->Free(), clear weight data in input_infos + std::vector> saved_weights; + for (int i = 0; i < tensors.size(); ++i) { + auto *tensor = &tensors[i]; + if (tensor->category() != VAR) { + saved_weights.emplace_back(new uint8_t[tensor->Size()]); + auto *weight_data = std::get<1>(input_infos[i]); + memcpy(saved_weights.back().get(), weight_data, tensor->Size()); + srand(time(nullptr)); + memset(weight_data, rand(), tensor->Size()); + } + } + + // simulating benchmark: LoadInput() + MS_LOG(DEBUG) << "malloc and init input data"; + for (auto input : subgraph_inputs) { + EXPECT_TRUE(input->MutableData() != nullptr); // malloc Image2D & call MapBuffer() + memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); + } + + // simulating benchmark: MarkAccuracy() -> session_->RunGraph() -> executor_->Run() -> SubGraphOpenCLKernel->Run() + MS_LOG(DEBUG) << "run SubGraph & compare result"; + EXPECT_TRUE(sub_graph->Run() == RET_OK); // will call UnmapBuffer() for input + + // check result + ocl_runtime->GetAllocator()->MapBuffer(output.data_c(), CL_MAP_READ, nullptr, true); + CompareOutput(output.data_c(), expect_data, output.ElementsNum(), atol, rtol, print_data); + ocl_runtime->GetAllocator()->UnmapBuffer(output.data_c()); + + MS_LOG(DEBUG) << "release resources"; + for (auto &tensor : tensors) { + if (tensor.category() != VAR && packed_op.count(primitive_type)) { + tensor.set_data(nullptr); + } + } + for (int i = 0, j = 0; i < tensors.size(); ++i) { // resume weight data to input_infos + auto *tensor = &tensors[i]; + if (tensor->category() != VAR) { + auto *weight_data = std::get<1>(input_infos[i]); + memcpy(weight_data, saved_weights[j++].get(), tensor->Size()); + } + } + delete sub_graph; +} + +void TestMain(const std::vector &input_infos, std::tuple, float *> output_info, + OpParameter *op_parameter, bool fp16_enable, float atol, float rtol, bool print_data) { + std::vector input_infos_new; + auto transform_fun = [](ArgsTuple in) -> ArgsTupleWithDtype { + return ArgsTupleWithDtype(std::get<0>(in), std::get<1>(in), std::get<2>(in), kNumberTypeFloat32); + }; + std::transform(input_infos.begin(), input_infos.end(), std::back_inserter(input_infos_new), transform_fun); + TestMain(input_infos_new, output_info, op_parameter, fp16_enable, atol, rtol, print_data); +} + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.h new file mode 100644 index 0000000000..75cc0186d3 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/common.h @@ -0,0 +1,102 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_OPENCL_COMMON_H_ +#define MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_OPENCL_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include "nnacl/op_base.h" +#include "ir/dtype/type_id.h" +#include "src/tensor.h" +#include "src/common/file_utils.h" +#include "common/common_test.h" + +using Tensor = mindspore::lite::Tensor; +using ArgsTuple = std::tuple, void *, Tensor::Category>; +using ArgsTupleWithDtype = std::tuple, void *, Tensor::Category, mindspore::TypeId>; +constexpr Tensor::Category VAR = Tensor::VAR; +constexpr Tensor::Category CONST_TENSOR = Tensor::Category::CONST_TENSOR; +constexpr Tensor::Category CONST_SCALAR = Tensor::Category::CONST_SCALAR; + +namespace mindspore::lite::opencl::test { + +template +void CompareOutput(void *output, void *expect, size_t elem_num, T atol, float rtol = 1e-9, bool print_data = false) { + T *output_data = reinterpret_cast(output); + T *expect_data = reinterpret_cast(expect); + + if (print_data) { + for (int i = 0; i < elem_num; ++i) { + printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], output_data[i]); + } + } + + int mismatch_num = 0; + int first_err_idx = -1; + for (int i = 0; i < elem_num; ++i) { + auto delta = static_cast(std::fabs(output_data[i] - expect_data[i])); + auto tolerance = static_cast(atol + rtol * std::fabs(expect_data[i])); + if (delta > tolerance) { + mismatch_num++; + if (first_err_idx == -1) { + first_err_idx = i; + } + } + } + if (mismatch_num > 0) { + printf("(mismatch %4.1f%%)\n", 100 * static_cast(mismatch_num) / elem_num); + printf("Not equal to tolerance atol=%.0e, rtol=%.0e\n", atol, rtol); + printf("first error at idx=%d expect=%.1f output=%.1f\n", first_err_idx, expect_data[first_err_idx], + output_data[first_err_idx]); + FAIL(); + } +} + +template +void CompareOutput(Tensor *output_tensor, const std::string &file_path, float atol, float rtol = 1e-9) { + size_t output_size; + auto expect_data = lite::ReadFile(file_path.c_str(), &output_size); + CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); +} + +template +T *CreateParameter(schema::PrimitiveType type) { + auto *param = static_cast(malloc(sizeof(T))); + if (param == nullptr) { + MS_LOG(ERROR) << std::string("create Parameter failed for ") + schema::EnumNamePrimitiveType(type) << std::endl; + return nullptr; + } + memset(param, 0, sizeof(T)); + param->op_parameter_.type_ = type; + return param; +} + +void TestMain(const std::vector &input_infos, std::tuple, float *> output_info, + OpParameter *op_parameter, bool fp16_enable = false, float atol = 1e-9, float rtol = 1e-9, + bool print_output = false); + +void TestMain(const std::vector &input_infos, std::tuple, float *> output_info, + OpParameter *op_parameter, bool fp16_enable = false, float atol = 1e-9, float rtol = 1e-9, + bool print_output = false); + +} // namespace mindspore::lite::opencl::test + +#endif // MINDSPORE_LITE_TEST_UT_SRC_RUNTIME_KERNEL_OPENCL_COMMON_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc index b2337d7dea..d2ec96c396 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/concat_tests.cc @@ -13,513 +13,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/concat_parameter.h" -namespace mindspore { -class TestConcatOpenCLfp32 : public mindspore::CommonTest { - public: - TestConcatOpenCLfp32() {} -}; -class TestConcatOpenCLfp16 : public mindspore::CommonTest { - public: - TestConcatOpenCLfp16() {} -}; +namespace mindspore::lite::opencl::test { -class TestConcatOpenCLCI : public mindspore::CommonTest { - public: - TestConcatOpenCLCI() {} -}; +class TestOpenCL_Concat : public CommonTest {}; -TEST_F(TestConcatOpenCLCI, ConcatFp32_2inputforCI) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - constexpr int INPUT_NUM = 2; - std::array, INPUT_NUM> input_shapes = {std::vector{1, 1, 1, 8}, std::vector{1, 1, 1, 8}}; - std::vector output_shape = {2, 1, 1, 8}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - float input_data1[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f}; - float input_data2[] = {0.5f, 0.6f, 0.74f, 0.23f, 0.46f, 0.69f, 0.13f, 0.47f}; - float correctOutput[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f, - 0.5f, 0.6f, 0.74f, 0.23f, 0.46f, 0.69f, 0.13f, 0.47f}; - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - return; - } - std::vector inputs; - std::vector outputs{output_tensor}; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(ConcatParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 0; - auto *concat_kernel = - new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (concat_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - concat_kernel->Init(); - // to do allocate memory for inputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{concat_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete concat_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; +namespace { +// PrimitiveType_Concat: src/ops/populate/concat_populate.cc +OpParameter *CreateParameter(int axis) { + auto *param = test::CreateParameter(schema::PrimitiveType_Concat); + param->axis_ = axis; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestConcatOpenCLfp16, ConcatFp16_4input_dim4_axis1) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, input2_size, input3_size, input4_size, output_size; - std::string input1Ppath = "./test_data/concatfp16_input1.bin"; - std::string input2Ppath = "./test_data/concatfp16_input2.bin"; - std::string input3Ppath = "./test_data/concatfp16_input3.bin"; - std::string input4Ppath = "./test_data/concatfp16_input4.bin"; - std::string correctOutputPath = "./test_data/concatfp16_output.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); - auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); - auto input_data4 = reinterpret_cast(mindspore::lite::ReadFile(input4Ppath.c_str(), &input4_size)); - auto correctOutput = - reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - - MS_LOG(INFO) << " init tensors "; - constexpr int INPUT_NUM = 4; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 19, 19, 96}, std::vector{1, 19, 19, 96}, std::vector{1, 19, 19, 96}, - std::vector{1, 19, 19, 96}}; - std::vector output_shape = {1, 76, 19, 96}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - std::vector inputs; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - for (auto tensor : inputs) { - delete tensor; - } - return; - } - std::vector outputs{output_tensor}; - MS_LOG(INFO) << " input_shapes size =: " << input_shapes.size(); - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(ConcatParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 1; - auto *concat_kernel = - new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (concat_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - concat_kernel->Init(); - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{concat_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete concat_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - if (inputs.size() == 2) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - } else if (inputs.size() == 3) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - } else if (inputs.size() == 4) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - memcpy(inputs[3]->data_c(), input_data4, input4_size); - } else { - MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; - } - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} - -TEST_F(TestConcatOpenCLfp32, ConcatFp32_3input_dim4_axis1) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, input2_size, input3_size, output_size; - std::string input1Ppath = "./test_data/concatfp32_input1.bin"; - std::string input2Ppath = "./test_data/concatfp32_input2.bin"; - std::string input3Ppath = "./test_data/concatfp32_input3.bin"; - std::string correctOutputPath = "./test_data/concatfp32_output.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); - auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); - auto correctOutput = reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - - MS_LOG(INFO) << " init tensors "; - constexpr int INPUT_NUM = 3; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}, std::vector{1, 16, 256, 80}}; - std::vector output_shape = {1, 48, 256, 80}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - std::vector inputs; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - for (auto tensor : inputs) { - delete tensor; - } - return; - } - std::vector outputs{output_tensor}; - MS_LOG(INFO) << " input_shapes size=: " << input_shapes.size(); - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(ConcatParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 1; - auto *concat_kernel = - new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (concat_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - concat_kernel->Init(); - // to do allocate memory for inputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{concat_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete concat_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - if (inputs.size() == 2) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - } else if (inputs.size() == 3) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - } else { - MS_LOG(ERROR) << " input size must be 2 or 3 "; - } - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} - -TEST_F(TestConcatOpenCLfp16, ConcatFp16_6input_dim4_axis1) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, input2_size, input3_size, input4_size, input5_size, input6_size, output_size; - std::string input1Ppath = "./test_data/concatfp16_input1.bin"; - std::string input2Ppath = "./test_data/concatfp16_input2.bin"; - std::string input3Ppath = "./test_data/concatfp16_input3.bin"; - std::string input4Ppath = "./test_data/concatfp16_input4.bin"; - std::string input5Ppath = "./test_data/concatfp16_input5.bin"; - std::string input6Ppath = "./test_data/concatfp16_input6.bin"; - std::string correctOutputPath = "./test_data/concatfp16_output.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); - auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); - auto input_data4 = reinterpret_cast(mindspore::lite::ReadFile(input4Ppath.c_str(), &input4_size)); - auto input_data5 = reinterpret_cast(mindspore::lite::ReadFile(input5Ppath.c_str(), &input5_size)); - auto input_data6 = reinterpret_cast(mindspore::lite::ReadFile(input6Ppath.c_str(), &input6_size)); - auto correctOutput = - reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - - MS_LOG(INFO) << " init tensors "; - constexpr int INPUT_NUM = 6; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 1200, 3, 4}, std::vector{1, 600, 3, 4}, std::vector{1, 150, 3, 4}, - std::vector{1, 50, 3, 4}, std::vector{1, 30, 3, 4}, std::vector{1, 4, 3, 4}}; - std::vector output_shape = {1, 2034, 3, 4}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - std::vector inputs; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - for (auto tensor : inputs) { - delete tensor; - } - return; - } - std::vector outputs{output_tensor}; - MS_LOG(INFO) << " input_shapes size =: " << input_shapes.size(); - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(ConcatParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ConcatParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 1; - auto *concat_kernel = - new (std::nothrow) kernel::ConcatOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (concat_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::ConcatOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - concat_kernel->Init(); - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{concat_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete concat_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - if (inputs.size() == 2) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - } else if (inputs.size() == 3) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - } else if (inputs.size() == 4) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - memcpy(inputs[3]->data_c(), input_data4, input4_size); - } else if (inputs.size() == 6) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - memcpy(inputs[3]->data_c(), input_data4, input4_size); - memcpy(inputs[4]->data_c(), input_data5, input5_size); - memcpy(inputs[5]->data_c(), input_data6, input6_size); - } else { - MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; - } - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; +TEST_F(TestOpenCL_Concat, input2_axis0) { + std::vector input0_shape = {1, 1, 1, 8}; + std::vector input1_shape = {1, 1, 1, 8}; + std::vector output_shape = {2, 1, 1, 8}; + int axis = 0; + float input0_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}; + float input1_data[] = {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}; + float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input0_shape, input0_data, VAR}, {input1_shape, input1_data, VAR}}, {output_shape, output_data}, param, + fp16_enable, fp16_enable ? 1e-3 : 1e-9); } - delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_tests.cc new file mode 100644 index 0000000000..cb9fdec16d --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_tests.cc @@ -0,0 +1,272 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/conv_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_Conv2D : public CommonTest {}; + +namespace { +// PrimitiveType_Concat: src/ops/populate/conv2d_populate.cc +ConvParameter *CreateParameter(const std::string &attr, ActType act_type) { + auto *param = test::CreateParameter(schema::PrimitiveType_Conv2D); + param->act_type_ = act_type; + sscanf(attr.c_str(), + "inputNHWC_%dx%dx%dx%d_outputNHWC_%dx%dx%dx%d_kernelHW_%dx%d_strideHW_%dx%d_padTopBottomLeftRight_%dx%dx%dx%d_" + "dilationHW_%dx%d", + ¶m->input_batch_, ¶m->input_h_, ¶m->input_w_, ¶m->input_channel_, ¶m->output_batch_, + ¶m->output_h_, ¶m->output_w_, ¶m->output_channel_, ¶m->kernel_h_, ¶m->kernel_w_, + ¶m->stride_h_, ¶m->stride_w_, ¶m->pad_u_, ¶m->pad_d_, ¶m->pad_l_, ¶m->pad_r_, + ¶m->dilation_h_, ¶m->dilation_w_); + return param; +} +} // namespace + +void TestMain_Conv2D(const std::string &attr, float *input_data, float *weight_data, float *bias_data, + float *output_data, ActType act_type, bool fp16_enable, float atol = 1e-9) { + auto *param = CreateParameter(attr, act_type); + std::vector input_shape = {param->input_batch_, param->input_h_, param->input_w_, param->input_channel_}; + std::vector weight_shape = {param->output_channel_, param->kernel_h_, param->kernel_w_, param->input_channel_}; + std::vector bias_shape = {param->output_channel_}; + std::vector output_shape = {param->output_batch_, param->output_h_, param->output_w_, param->output_channel_}; + std::vector input_infos = {{input_shape, input_data, VAR}, {weight_shape, weight_data, CONST_TENSOR}}; + if (bias_data) { + input_infos.emplace_back(bias_shape, bias_data, CONST_TENSOR); + } + TestMain(input_infos, {output_shape, output_data}, reinterpret_cast(param), fp16_enable, atol); +} + +TEST_F(TestOpenCL_Conv2D, test0) { + std::string attr = + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; + std::vector input_shape, weight_shape, bias_shape, output_shape; + float input_data[] = {0, 1, 2, 3, 4, 5, -6, -7}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + float bias_data[] = {0, 0}; + + float output_data[] = {1, 1, 5, 5, 9, 9, -13, -13}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, true, 1e-6f); + + float output_data_relu[] = {1, 1, 5, 5, 9, 9, 0, 0}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data_relu, ActType_Relu, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data_relu, ActType_Relu, true, 1e-6f); + + float output_data_relu6[] = {1, 1, 5, 5, 6, 6, 0, 0}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data_relu6, ActType_Relu6, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data_relu6, ActType_Relu6, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test0_no_bias) { + std::string attr = + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + float output_data[] = {1, 1, 5, 5, 9, 9, 13, 13}; + TestMain_Conv2D(attr, input_data, weight_data, nullptr, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, nullptr, output_data, ActType_No, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test1) { + std::string attr = + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 2, 3, 4, 5, 6, 7, 8}; + float bias_data[] = {0.5, -0.5}; + float output_data[] = {2.5, 3.5, 8.5, 17.5, 14.5, 31.5, 20.5, 45.5}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test2) { + std::string attr = + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1}; + float bias_data[] = {0}; + float output_data[] = {28, 18, 22, 13}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test3) { + std::string attr = + "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + float bias_data[] = {0.5, -0.5}; + float output_data[] = {168.5, 391.5, 80.5, 223.5, 60.5, 235.5, 20.5, 123.5}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test3_batch2) { + std::string attr = + "inputNHWC_2x2x2x2_outputNHWC_2x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + float bias_data[] = {0.5, -0.5}; + float output_data[] = {168.5, 391.5, 80.5, 223.5, 60.5, 235.5, 20.5, 123.5, + 168.5, 391.5, 80.5, 223.5, 60.5, 235.5, 20.5, 123.5}; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, false, 1e-3f); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, output_data, ActType_No, true, 1e-6f); +} + +TEST_F(TestOpenCL_Conv2D, test4) { + std::vector, std::vector, std::vector, + std::vector, ActType>> + cases = { + {"SimpleTestFloat32WithAnisotropicStrides", + "inputNHWC_1x3x6x1_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x3_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4, 5, 4, 3, -3, -4, -5}, + {1, 2, 3, 4}, + {-1}, + {30, -24, 40, -34}, + ActType_No}, + {"SimpleTestFloat32", + "inputNHWC_2x2x4x1_outputNHWC_2x1x2x3_kernelHW_2x2_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}, + ActType_No}, + {"SimpleTestFloat32SingleThreaded", + "inputNHWC_2x2x4x1_outputNHWC_2x1x2x3_kernelHW_2x2_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}, + ActType_No}, + {"SimpleTestFloat32WithChannels", + "inputNHWC_2x2x4x2_outputNHWC_2x1x2x3_kernelHW_2x2_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1, 1, 1, 1, 1, + 0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2, 0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2}, + {1, 1, 2, 2, 3, 3, 4, 4, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}, + ActType_No}, + {"InputAndweightSameWidthHeight", + "inputNHWC_2x2x4x1_outputNHWC_2x1x1x1_kernelHW_2x4_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4}, + {1, 2, 3, 4, -1, -1, 1, 1}, + {0}, + {10, 34}, + ActType_No}, + {"ActivationRelu6Test", + "inputNHWC_2x2x4x1_outputNHWC_2x1x2x3_kernelHW_2x2_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {6, 2, 5, 6, 2, 5, 6, 4, 3, 6, 4, 3}, + ActType_Relu6}, + {"StrideTest", + "inputNHWC_2x2x4x1_outputNHWC_2x1x3x3_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 3, 2, 1, 2, 3, 4, 1, 2, 4, 4}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 22, 3, 6, 21, 1, 6, 17, 4, 3, 31, 5, 4, 40, 3, 4}, + ActType_No}, + {"PaddingTest", + "inputNHWC_1x2x4x1_outputNHWC_1x2x4x3_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 3, 2}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 22, 3, 6, 21, 1, 6, 8, -1, 4, 7, 2, -1, 9, 3, -2, 8, 1, -2, 3, 0, 1}, + ActType_No}, + {"PointwiseFloat32", + "inputNHWC_2x2x4x2_outputNHWC_2x2x4x1_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 1, 1, 1, 1, 1, 1, + 0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2, 0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2}, + {1, 2}, + {0}, + {1.5, 1.5, 1.5, 1.5, 3, 3, 3, 3, 1.5, 3, 4.5, 6, 1.5, 3, 4.5, 6}, + ActType_No}, + {"SimpleTestFloat32WithAnisotropicStrides", + "inputNHWC_1x3x6x1_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x3_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {3, 2, 1, -1, -2, -3, 4, 3, 2, -2, -3, -4, 5, 4, 3, -3, -4, -5}, + {1, 2, 3, 4}, + {-1}, + {30, -24, 40, -34}, + ActType_No}, + {"HandCalculatedFloat32", + "inputNHWC_1x3x4x1_outputNHWC_1x3x4x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_dilationHW_1x1", + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 4, 7, 2, 5, 8, 3, 6, 9}, + {0}, + {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121}, + ActType_No}, + {"HandCalculatedFloat32WithConstweight", + "inputNHWC_1x3x4x1_outputNHWC_1x3x4x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_dilationHW_1x1", + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 4, 7, 2, 5, 8, 3, 6, 9}, + {0}, + {105, 150, 183, 95, 235, 312, 357, 178, 187, 234, 261, 121}, + ActType_No}, + {"HandCalculatedWithBiasFloat32", + "inputNHWC_1x3x4x1_outputNHWC_1x3x4x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_dilationHW_1x1", + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 4, 7, 2, 5, 8, 3, 6, 9}, + {10}, + {115, 160, 193, 105, 245, 322, 367, 188, 197, 244, 271, 131}, + ActType_No}, + {"HandCalculatedWithReluFloat32", + "inputNHWC_1x3x4x1_outputNHWC_1x3x4x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_1x1x1x1_dilationHW_1x1", + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 4, 7, 2, 5, 8, 3, 6, 9}, + {-200}, + {0, 0, 0, 0, 35, 112, 157, 0, 0, 34, 61, 0}, + ActType_Relu}, + {"HandCalculatedValidFloat32", + "inputNHWC_1x3x4x1_outputNHWC_1x1x2x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 4, 7, 2, 5, 8, 3, 6, 9}, + {0}, + {312, 357}, + ActType_No}, + {"SimpleTestFloatWithDilation", + "inputNHWC_1x9x9x1_outputNHWC_1x3x3x1_kernelHW_3x3_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_3x3", + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 2, 3, 4, 5, 6, 7, 8, 9}, + {0}, + {5, 5, 5, 5, 5, 5, 5, 5, 5}, + ActType_No}, + {"SimpleTestQuantizedOutputMultiplierGreaterThan1", + "inputNHWC_2x2x4x1_outputNHWC_2x1x2x3_kernelHW_2x2_strideHW_2x2_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1", + {1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4}, + {1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1}, + {1, 2, 3}, + {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3}, + ActType_No}, + }; + + for (auto &case_ : cases) { + auto &name = std::get<0>(case_); + auto &attr = std::get<1>(case_); + auto input_data = std::get<2>(case_).data(); + auto weight_data = std::get<3>(case_).data(); + auto bias_data = std::get<4>(case_).data(); + auto expect_data = std::get<5>(case_).data(); + auto act_type = std::get<6>(case_); + std::cout << name << std::endl; + TestMain_Conv2D(attr, input_data, weight_data, bias_data, expect_data, act_type, false); + TestMain_Conv2D(attr, input_data, weight_data, bias_data, expect_data, act_type, true); + } +} + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc index 0f82879c47..cff19fb617 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/conv2d_transpose_tests.cc @@ -13,161 +13,64 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/conv_parameter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h" -#include "src/common/log_adapter.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +namespace mindspore::lite::opencl::test { -namespace mindspore { -class TestConv2dTransposeOpenCL : public mindspore::CommonTest { - public: - TestConv2dTransposeOpenCL() {} -}; +class TestOpenCL_Conv2dTranspose : public CommonTest {}; + +namespace { +// PrimitiveType_DeConv2D: src/ops/populate/deconv2d_populate.cc +OpParameter *CreateParameter(int n, int h, int w, int ci, int co, int kh, int kw, int pad, + std::vector *input_shape, std::vector *weight_shape, + std::vector *bias_shape, std::vector *output_shape) { + auto *param = test::CreateParameter(schema::PrimitiveType_DeConv2D); + param->kernel_h_ = kh; + param->kernel_w_ = kw; + param->stride_h_ = 2; + param->stride_w_ = 2; + param->pad_u_ = pad; + param->pad_d_ = pad; + param->pad_l_ = pad; + param->pad_r_ = pad; + param->dilation_h_ = 1; + param->dilation_w_ = 1; + param->act_type_ = ActType_No; -void RunTestCaseConv2dTranspose(const std::vector &shape, void *input_data, void *weight_data, void *bias_data, - void *output_data, bool enable_fp16) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - int pad = shape[0]; - int n = shape[1]; - int h = shape[2]; - int w = shape[3]; - int kh = shape[4]; - int kw = shape[5]; - int ci = shape[6]; - int co = shape[7]; int oh = 2 * h - 1 + 2 * (kh - 1 - pad) - kh + 1; int ow = 2 * w - 1 + 2 * (kw - 1 - pad) - kw + 1; - std::vector input_shape = {n, h, w, ci}; - auto tensor_x_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), input_shape); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - - std::vector weight_shape = {co, kh, kw, ci}; - auto tensor_w_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), weight_shape); - auto tensor_w = tensor_w_ptr.get(); - if (tensor_w == nullptr) { - MS_LOG(ERROR) << "tensor_w create error."; - return; - } - tensor_w->set_data(weight_data); - - std::vector bias_shape = {co}; - auto tensor_bias_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), bias_shape); - auto tensor_bias = tensor_bias_ptr.get(); - if (tensor_bias == nullptr) { - MS_LOG(ERROR) << "tensor_bias create error."; - return; - } - tensor_bias->set_data(bias_data); - - std::vector out_shape = {1, oh, ow, co}; - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_w, tensor_bias}; - std::vector outputs{tensor_out}; - auto opParameter = static_cast(malloc(sizeof(ConvParameter))); - if (opParameter == nullptr) { - MS_LOG(ERROR) << "opParameter create error."; - return; - } - opParameter->kernel_h_ = kh; - opParameter->kernel_w_ = kw; - opParameter->stride_h_ = 2; - opParameter->stride_w_ = 2; - opParameter->pad_u_ = pad; - opParameter->pad_l_ = pad; - opParameter->input_channel_ = ci; - opParameter->output_channel_ = co; - auto op_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(opParameter), nullptr, kernel::KernelKey(), nullptr); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - op_kernel->set_name("DeConv"); - - inputs[0]->MallocData(allocator); - std::vector kernels{op_kernel}; - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, n * h * w * ci * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast(1e-3), 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, n * oh * ow * co, static_cast(1e-5)); - } - - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } + *input_shape = {n, h, w, ci}; + *weight_shape = {co, kh, kw, ci}; + *bias_shape = {co}; + *output_shape = {1, oh, ow, co}; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp32) { - int pad = 0; +TEST_F(TestOpenCL_Conv2dTranspose, test0) { int n = 1; int h = 2; int w = 2; - int kh = 2; - int kw = 2; int ci = 2; int co = 1; - std::vector shape = {pad, n, h, w, kh, kw, ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; - std::vector bias_data = {0.5f}; - std::vector output_data = {5.5f, 6.5f, 17.5f, 22.5f, 7.5f, 8.5f, 27.5f, 32.5f, - 29.5f, 38.5f, 41.5f, 54.5f, 47.5f, 56.5f, 67.5f, 80.5f}; - RunTestCaseConv2dTranspose(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), false); -} - -TEST_F(TestConv2dTransposeOpenCL, Conv2dTransposeFp16) { - int pad = 0; - int n = 1; - int h = 2; - int w = 2; int kh = 2; int kw = 2; - int ci = 2; - int co = 1; - std::vector shape = {pad, n, h, w, kh, kw, ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; - std::vector bias_data = {0.5f}; - std::vector output_data = {5.5f, 6.5f, 17.5f, 22.5f, 7.5f, 8.5f, 27.5f, 32.5f, - 29.5f, 38.5f, 41.5f, 54.5f, 47.5f, 56.5f, 67.5f, 80.5f}; + int pad = 0; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 2, 3, 4, 5, 6, 7, 8}; + float bias_data[] = {0.5}; + float output_data[] = {5.5, 6.5, 17.5, 22.5, 7.5, 8.5, 27.5, 32.5, 29.5, 38.5, 41.5, 54.5, 47.5, 56.5, 67.5, 80.5}; - RunTestCaseConv2dTranspose(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), true); + for (auto fp16_enable : {false, true}) { + std::vector input_shape, weight_shape, bias_shape, output_shape; + auto *param = + CreateParameter(n, h, w, ci, co, kh, kw, pad, &input_shape, &weight_shape, &bias_shape, &output_shape); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc deleted file mode 100644 index 3a0f10b389..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/convolution_tests.cc +++ /dev/null @@ -1,233 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.h" -#include "nnacl/pack.h" - -using mindspore::kernel::ConvolutionOpenCLKernel; -using mindspore::kernel::LiteKernel; -using mindspore::kernel::SubGraphOpenCLKernel; -using mindspore::lite::Tensor; -using mindspore::schema::Format; -using mindspore::schema::NodeType_ValueNode; -using mindspore::schema::Format::Format_KHWC; -using mindspore::schema::Format::Format_NHWC; - -namespace mindspore { - -class TestConvolutionOpenCL : public mindspore::CommonTest {}; - -void LoadData(Tensor *tensor, const float *src) { - if (tensor->data_type() == kNumberTypeFloat16) { - auto num = tensor->Size() / sizeof(float16_t); - auto tensor_data = reinterpret_cast(tensor->data_c()); - for (int i = 0; i < num; ++i) { - tensor_data[i] = static_cast(src[i]); - } - } else { - memcpy(tensor->data_c(), src, tensor->Size()); - } -} - -void CompareOutput(Tensor *output, const float *expect_data, const float atol) { - auto num = output->Size() / (output->data_type() == kNumberTypeFloat16 ? 2 : 4); - std::vector output_data(num); - if (output->data_type() == kNumberTypeFloat16) { - for (int i = 0; i < output_data.size(); ++i) { - output_data[i] = static_cast(reinterpret_cast(output->data_c())[i]); - } - } else { - memcpy(output_data.data(), output->data_c(), output->Size()); - } - - printf("output:"); - for (int i = 0; i < std::min(10, output->ElementsNum()); i++) { - printf("%7.3f ", output_data[i]); - } - printf("\n"); - - bool not_equal = false; - int idx = 0; - std::array idx_4d{}; - auto N = output->Batch(), H = output->Height(), W = output->Width(), C = output->Channel(); - for (int i = 0, cn = 0; i < N; ++i) { - for (int j = 0; j < H; ++j) { - for (int k = 0; k < W; ++k) { - for (int l = 0; l < C; ++l) { - auto err = std::fabs(output_data[cn] - expect_data[cn]); - if (err > atol) { - not_equal = true; - idx_4d = {i, j, k, l}; - goto End; - } - cn++; - } - } - } - } - -End: - if (not_equal) { - printf("first error at [%d %d %d %d] expect=%.3f output=%.3f\n", idx_4d[0], idx_4d[1], idx_4d[2], idx_4d[3], - expect_data[idx], output_data[idx]); - FAIL(); - } else { - printf("COMPARE SUCCESS!\n\n"); - } -} - -void TEST_MAIN(const std::string &attr, const TypeId data_type, const float atol, const float *input_data, - const float *weight_data, const float *bias_data, const float *expect_data) { - auto param = static_cast(malloc(sizeof(ConvParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "ConvParameter create error."; - return; - } - sscanf(attr.c_str(), - "inputNHWC_%dx%dx%dx%d_outputNHWC_%dx%dx%dx%d_kernelHW_%dx%d_strideHW_%dx%d_padTopBottomLeftRight_%dx%dx%dx%d_" - "dilationHW_%dx%d", - ¶m->input_batch_, ¶m->input_h_, ¶m->input_w_, ¶m->input_channel_, ¶m->output_batch_, - ¶m->output_h_, ¶m->output_w_, ¶m->output_channel_, ¶m->kernel_h_, ¶m->kernel_w_, - ¶m->stride_h_, ¶m->stride_w_, ¶m->pad_u_, ¶m->pad_d_, ¶m->pad_l_, ¶m->pad_r_, - ¶m->dilation_h_, ¶m->dilation_w_); - - MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = runtime_wrapper.GetInstance(); - ocl_runtime->Init(); - ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(DEBUG) << "create Tensors"; - std::vector input_shape = {param->input_batch_, param->input_h_, param->input_w_, param->input_channel_}; - std::vector weight_shape = {param->output_channel_, param->kernel_h_, param->kernel_w_, param->input_channel_}; - std::vector bias_shape = {param->output_channel_}; - std::vector output_shape = {param->output_batch_, param->output_h_, param->output_w_, param->output_channel_}; - auto input = Tensor(data_type, input_shape, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto weight = Tensor(data_type, weight_shape, Format_KHWC, lite::Tensor::CONST_TENSOR); - auto bias = Tensor(data_type, bias_shape, Format_KHWC, lite::Tensor::CONST_TENSOR); - auto output = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::CONST_TENSOR); - - MS_LOG(DEBUG) << "allocate memory and initialize weight/bias"; - weight.MallocData(); - LoadData(&weight, weight_data); - if (bias_data) { - bias.MallocData(); - LoadData(&bias, bias_data); - } - - MS_LOG(DEBUG) << "create OpenCL Kernel"; - std::vector inputs{&input, &weight}; - if (bias_data) { - inputs.push_back(&bias); - } - std::vector outputs{&output}; - auto kernel = std::make_unique(reinterpret_cast(param), inputs, outputs); - kernel->Init(); - - MS_LOG(DEBUG) << "create SubGraph"; - std::vector kernels{kernel.release()}; - auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel({&input}, {&output}, kernels, kernels, kernels); - if (sub_graph == nullptr) { - return; - } - input.MallocData(allocator); - sub_graph->Init(); - LoadData(&input, input_data); - sub_graph->Run(); - CompareOutput(&output, expect_data, atol); - - MS_LOG(DEBUG) << "release resources"; - weight.FreeData(); - if (bias_data) { - bias.FreeData(); - } - delete sub_graph; -} - -TEST_F(TestConvolutionOpenCL, test0) { - std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - float bias_data[] = {0.0f, 0.0f}; - float expect_data[] = {1.0f, 1.0f, 5.0f, 5.0f, 9.0f, 9.0f, 13.0f, 13.0f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); -} - -TEST_F(TestConvolutionOpenCL, test0_no_bias) { - std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - float expect_data[] = {1.0f, 1.0f, 5.0f, 5.0f, 9.0f, 9.0f, 13.0f, 13.0f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, nullptr, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, nullptr, expect_data); -} - -TEST_F(TestConvolutionOpenCL, test1) { - std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_1x1_strideHW_1x1_padTopBottomLeftRight_0x0x0x0_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}; - float bias_data[] = {0.5f, -0.5f}; - float expect_data[] = {2.5f, 3.5f, 8.5f, 17.5f, 14.5f, 31.5f, 20.5f, 45.5f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); -} - -TEST_F(TestConvolutionOpenCL, test2) { - std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x1_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - float bias_data[] = {0.0f}; - float expect_data[] = {28.0f, 18.0f, 22.0f, 13.0f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); -} - -TEST_F(TestConvolutionOpenCL, test3) { - std::string attr = - "inputNHWC_1x2x2x2_outputNHWC_1x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; - float bias_data[] = {0.5f, -0.5f}; - float expect_data[] = {168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); -} - -TEST_F(TestConvolutionOpenCL, test3_batch2) { - std::string attr = - "inputNHWC_2x2x2x2_outputNHWC_2x2x2x2_kernelHW_2x2_strideHW_1x1_padTopBottomLeftRight_0x1x0x1_dilationHW_1x1"; - float input_data[] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - float weight_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; - float bias_data[] = {0.5f, -0.5f}; - float expect_data[] = {168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f, - 168.5f, 391.5f, 80.5f, 223.5f, 60.5f, 235.5f, 20.5f, 123.5f}; - TEST_MAIN(attr, kNumberTypeFloat32, 1e-3f, input_data, weight_data, bias_data, expect_data); - TEST_MAIN(attr, kNumberTypeFloat16, 1e-6f, input_data, weight_data, bias_data, expect_data); -} - -} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc index 3081929a76..4cd1c22236 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/depthwise_conv2d_tests.cc @@ -13,154 +13,51 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "nnacl/pack.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h" - -namespace mindspore { -class TestConvolutionDwOpenCL : public mindspore::CommonTest { - public: - TestConvolutionDwOpenCL() {} -}; - -template -void DepthWiseTestMain(ConvParameter *conv_param, T2 *input_data, T1 *weight_data, T2 *gnd_data, schema::Format format, - TypeId dtype = kNumberTypeFloat32, bool is_compare = true, T2 err_max = 1e-5) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - if (dtype == kNumberTypeFloat16) { - ocl_runtime->SetFp16Enable(true); - } - - // pack input - int input_size = conv_param->input_channel_ * conv_param->input_h_ * conv_param->input_w_; - std::function to_dtype = [](T2 x) -> T2 { return x; }; - - // pack weight - int pack_weight_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_; - T1 *packed_weight = weight_data; - - // T1 bias_data[] = {0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.0, 0.0, 0.0}; - T1 bias_data[] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; - size_t output_size = - conv_param->output_batch_ * conv_param->output_channel_ * conv_param->output_h_ * conv_param->output_w_; - - std::vector shape_filter = {1, conv_param->kernel_h_, conv_param->kernel_w_, conv_param->output_channel_}; - std::vector shape_bias = {conv_param->output_channel_}; - std::vector shape_out; - std::vector shape_in; - if (format == schema::Format_NHWC || format == schema::Format_NHWC4 || format == schema::Format_NC4HW4) { - shape_in = std::vector( - {conv_param->input_batch_, conv_param->input_h_, conv_param->input_w_, conv_param->input_channel_}); - shape_out = std::vector( - {conv_param->output_batch_, conv_param->output_h_, conv_param->output_w_, conv_param->output_channel_}); - } else if (format == schema::Format_NCHW) { - shape_in = std::vector( - {conv_param->input_batch_, conv_param->input_channel_, conv_param->input_h_, conv_param->input_w_}); - shape_out = std::vector( - {conv_param->output_batch_, conv_param->output_channel_, conv_param->output_h_, conv_param->output_w_}); - } else { - MS_LOG(ERROR) << "Unsupported format: " << format; - return; - } - auto tensor_a = lite::Tensor(TypeId(dtype), shape_in, format); - auto tensor_b = lite::Tensor(TypeId(dtype), shape_filter, schema::Format_NHWC); - auto tensor_c = lite::Tensor(TypeId(dtype), shape_bias, schema::Format_NHWC); - auto tensor_d = lite::Tensor(TypeId(dtype), shape_out, format); - std::vector inputs{&tensor_a, &tensor_b, &tensor_c}; - std::vector outputs{&tensor_d}; - - // freamework to do!!! - inputs[1]->set_data(packed_weight); - inputs[2]->set_data(bias_data); - - OpParameter *parameter = reinterpret_cast(conv_param); - auto pKernel = std::make_unique(parameter, inputs, outputs); - if (pKernel.get() == nullptr) { - return; - } - pKernel->Init(); - - std::vector kernels{pKernel.release()}; - std::vector inputs_{&tensor_a}; - auto pGraph = std::make_unique(inputs_, outputs, kernels, kernels, kernels); - if (pGraph.get() == nullptr) { - return; - } - pGraph->Init(); - - // freamework to do!!! - inputs[0]->MallocData(allocator); - memcpy(inputs[0]->data_c(), input_data, sizeof(T2) * input_size); - - pGraph->Run(); - if (is_compare) { - T2 *output_data = reinterpret_cast(outputs[0]->data_c()); - - printf("==================input_data=================\n"); - std::cout << std::endl; - for (int i = 0; i < input_size; i++) { - std::cout << input_data[i] << ", "; - } - std::cout << std::endl; - printf("==================weight data=================\n"); - std::cout << std::endl; - for (int i = 0; i < pack_weight_size; i++) { - std::cout << packed_weight[i] << ", "; - } - std::cout << std::endl; - printf("==================output data=================\n"); - std::cout << std::endl; - for (int i = 0; i < output_size; i++) { - std::cout << output_data[i] << ", "; - } - std::cout << std::endl; - printf("==================expected output data=================\n"); - for (int i = 0; i < output_size; i++) { - std::cout << gnd_data[i] << ", "; - } - std::cout << std::endl; - // compare - CommonTest::CompareOutputData(output_data, gnd_data, output_size, err_max); - } - - inputs[1]->set_data(nullptr); - inputs[2]->set_data(nullptr); - inputs[0]->set_data(nullptr); - outputs[0]->set_data(nullptr); - return; +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/conv_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_DepthwiseConv2d : public CommonTest {}; + +namespace { +// PrimitiveType_DepthwiseConv2D: src/ops/populate/depthwise_conv2d_populate.cc +OpParameter *CreateParameter(int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_u, int pad_d, int pad_l, + int pad_r, int dilation_h, int dilation_w, ActType act_type, int input_channel) { + auto *param = test::CreateParameter(schema::PrimitiveType_DepthwiseConv2D); + param->kernel_h_ = kernel_h; + param->kernel_w_ = kernel_w; + param->stride_h_ = stride_h; + param->stride_w_ = stride_w; + param->pad_u_ = pad_u; + param->pad_d_ = pad_d; + param->pad_l_ = pad_l; + param->pad_r_ = pad_r; + param->input_channel_ = input_channel; + param->dilation_h_ = dilation_h; + param->dilation_w_ = dilation_w; + param->act_type_ = act_type; + return reinterpret_cast(param); } - -TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 4; - conv_param->input_w_ = 4; - conv_param->input_channel_ = 4; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 2; - conv_param->output_w_ = 2; - conv_param->output_channel_ = 4; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 0; - conv_param->pad_l_ = 0; - } - - // nhwc +} // namespace + +TEST_F(TestOpenCL_DepthwiseConv2d, NoPad) { + int kernel_h = 3; + int kernel_w = 3; + int stride_h = 1; + int stride_w = 1; + int pad_u = 0; + int pad_d = 0; + int pad_l = 0; + int pad_r = 0; + int dilation_h = 1; + int dilation_w = 1; + ActType act_type = ActType_No; + + std::vector input_shape = {1, 4, 4, 4}; + std::vector output_shape = {1, 2, 2, 4}; + std::vector weight_shape = {1, kernel_h, kernel_w, output_shape.back()}; + std::vector bias_shape = {output_shape.back()}; float input_data[] = {0.5488135, 0.0202184, 0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076, 0.60276335, 0.77815676, 0.0187898, 0.57019675, 0.5448832, 0.87001216, 0.6176355, 0.43860152, 0.4236548, 0.9786183, 0.6120957, 0.9883738, 0.6458941, 0.7991586, 0.616934, 0.10204481, @@ -169,396 +66,70 @@ TEST_F(TestConvolutionDwOpenCL, NoPadNC4HW4Fp32) { 0.79172504, 0.14335328, 0.6976312, 0.46631077, 0.5288949, 0.9446689, 0.06022547, 0.2444256, 0.56804454, 0.5218483, 0.6667667, 0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514, 0.07103606, 0.2645556, 0.21038257, 0.6563296, 0.0871293, 0.7742337, 0.12892629, 0.13818295}; - - // co h w ci + float bias_data[] = {0, 0, 0, 0}; float weight_data[] = {0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962, 0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, 0.5865129, 0.02010755, 0.82894003, 0.00469548}; + float output_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686, + 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; - // pack correct data, nhwc - float gnd_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686, - 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4); -} - -TEST_F(TestConvolutionDwOpenCL, PadNC4HW4Fp32) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 3; - conv_param->input_w_ = 3; - conv_param->input_channel_ = 5; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 3; - conv_param->output_w_ = 3; - conv_param->output_channel_ = 5; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 1; - conv_param->pad_l_ = 1; - } - - // nhwc - float input_data[] = {0.5488135, 0.3834415, 0.77815676, 0.9446689, 0.6120957, 0.71518934, 0.79172504, 0.87001216, - 0.5218483, 0.616934, 0.60276335, 0.5288949, 0.9786183, 0.41466194, 0.94374806, 0.5448832, - 0.56804454, 0.7991586, 0.2645556, 0.6818203, 0.4236548, 0.92559665, 0.46147937, 0.7742337, - 0.3595079, 0.6458941, 0.07103606, 0.7805292, 0.45615032, 0.43703195, 0.4375872, 0.0871293, - 0.11827443, 0.56843394, 0.6976312, 0.891773, 0.0202184, 0.639921, 0.0187898, 0.06022547, - 0.96366274, 0.83261985, 0.14335328, 0.6176355, 0.6667667}; - // float input_data[]={ - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 }; - // co h w ci - float weight_data[] = {0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, - 0.10204481, 0.20887676, 0.16130951, 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, - 0.11037514, 0.6563296, 0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, - 0.09609841, 0.97645944, 0.4686512, 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, - 0.12019656, 0.2961402, 0.11872772, 0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, - 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962}; - // float weight_data[]={ - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 }; - // pack correct data, nhwc - float gnd_data[] = {1.189188, 1.0425153, 1.8012011, 0.6074867, 1.2120346, 1.5005531, 0.8346756, 2.4365785, - 0.54975945, 1.6815965, 1.2690231, 0.60214907, 1.6158017, 0.42115876, 0.8854959, 1.1709145, - 1.0929465, 1.3534508, 1.1985044, 1.2932993, 2.4621446, 1.7086457, 2.6977584, 2.1960166, - 2.3769147, 2.3185873, 0.6133741, 0.9687358, 0.9987654, 1.0254729, 0.8368954, 0.74171704, - 0.8749627, 0.8953936, 0.5093431, 1.5496738, 0.54936385, 0.7683113, 1.165742, 1.3682933, - 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NC4HW4); -} - -TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp32) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 4; - conv_param->input_w_ = 4; - conv_param->input_channel_ = 4; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 2; - conv_param->output_w_ = 2; - conv_param->output_channel_ = 4; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 0; - conv_param->pad_l_ = 0; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h, + dilation_w, act_type, input_shape.back()); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5); } - - // nhwc - float input_data[] = {0.5488135, 0.0202184, 0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076, - 0.60276335, 0.77815676, 0.0187898, 0.57019675, 0.5448832, 0.87001216, 0.6176355, 0.43860152, - 0.4236548, 0.9786183, 0.6120957, 0.9883738, 0.6458941, 0.7991586, 0.616934, 0.10204481, - 0.4375872, 0.46147937, 0.94374806, 0.20887676, 0.891773, 0.7805292, 0.6818203, 0.16130951, - 0.96366274, 0.11827443, 0.3595079, 0.6531083, 0.3834415, 0.639921, 0.43703195, 0.2532916, - 0.79172504, 0.14335328, 0.6976312, 0.46631077, 0.5288949, 0.9446689, 0.06022547, 0.2444256, - 0.56804454, 0.5218483, 0.6667667, 0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514, - 0.07103606, 0.2645556, 0.21038257, 0.6563296, 0.0871293, 0.7742337, 0.12892629, 0.13818295}; - - // co h w ci - float weight_data[] = {0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, - 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, - 0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, - 0.5759465, 0.9292962, 0.31856894, 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, - 0.5865129, 0.02010755, 0.82894003, 0.00469548}; - - // pack correct data, nhwc - float gnd_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686, - 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NHWC4); - // delete conv_param; } -TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp32) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 3; - conv_param->input_w_ = 3; - conv_param->input_channel_ = 5; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 3; - conv_param->output_w_ = 3; - conv_param->output_channel_ = 5; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 1; - conv_param->pad_l_ = 1; - } - - // nhwc +TEST_F(TestOpenCL_DepthwiseConv2d, Pad) { + int kernel_h = 3; + int kernel_w = 3; + int stride_h = 1; + int stride_w = 1; + int pad_u = 1; + int pad_d = 1; + int pad_l = 1; + int pad_r = 1; + int dilation_h = 1; + int dilation_w = 1; + ActType act_type = ActType_No; + + std::vector input_shape = {1, 3, 3, 5}; + std::vector output_shape = {1, 3, 3, 5}; + std::vector weight_shape = {1, kernel_h, kernel_w, output_shape.back()}; + std::vector bias_shape = {output_shape.back()}; float input_data[] = {0.5488135, 0.3834415, 0.77815676, 0.9446689, 0.6120957, 0.71518934, 0.79172504, 0.87001216, 0.5218483, 0.616934, 0.60276335, 0.5288949, 0.9786183, 0.41466194, 0.94374806, 0.5448832, 0.56804454, 0.7991586, 0.2645556, 0.6818203, 0.4236548, 0.92559665, 0.46147937, 0.7742337, 0.3595079, 0.6458941, 0.07103606, 0.7805292, 0.45615032, 0.43703195, 0.4375872, 0.0871293, 0.11827443, 0.56843394, 0.6976312, 0.891773, 0.0202184, 0.639921, 0.0187898, 0.06022547, 0.96366274, 0.83261985, 0.14335328, 0.6176355, 0.6667667}; - // float input_data[]={ - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 }; - // co h w ci float weight_data[] = {0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, 0.20887676, 0.16130951, 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, 0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962}; - // float weight_data[]={ - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 }; - // pack correct data, nhwc - float gnd_data[] = {1.189188, 1.0425153, 1.8012011, 0.6074867, 1.2120346, 1.5005531, 0.8346756, 2.4365785, - 0.54975945, 1.6815965, 1.2690231, 0.60214907, 1.6158017, 0.42115876, 0.8854959, 1.1709145, - 1.0929465, 1.3534508, 1.1985044, 1.2932993, 2.4621446, 1.7086457, 2.6977584, 2.1960166, - 2.3769147, 2.3185873, 0.6133741, 0.9687358, 0.9987654, 1.0254729, 0.8368954, 0.74171704, - 0.8749627, 0.8953936, 0.5093431, 1.5496738, 0.54936385, 0.7683113, 1.165742, 1.3682933, - 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NHWC4); -} - -TEST_F(TestConvolutionDwOpenCL, NoPadNHWC4Fp16) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 4; - conv_param->input_w_ = 4; - conv_param->input_channel_ = 4; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 2; - conv_param->output_w_ = 2; - conv_param->output_channel_ = 4; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 0; - conv_param->pad_l_ = 0; - } - - // nhwc - float16_t input_data[] = { - 0.5488135, 0.0202184, 0.45615032, 0.31542835, 0.71518934, 0.83261985, 0.56843394, 0.36371076, - 0.60276335, 0.77815676, 0.0187898, 0.57019675, 0.5448832, 0.87001216, 0.6176355, 0.43860152, - 0.4236548, 0.9786183, 0.6120957, 0.9883738, 0.6458941, 0.7991586, 0.616934, 0.10204481, - 0.4375872, 0.46147937, 0.94374806, 0.20887676, 0.891773, 0.7805292, 0.6818203, 0.16130951, - 0.96366274, 0.11827443, 0.3595079, 0.6531083, 0.3834415, 0.639921, 0.43703195, 0.2532916, - 0.79172504, 0.14335328, 0.6976312, 0.46631077, 0.5288949, 0.9446689, 0.06022547, 0.2444256, - 0.56804454, 0.5218483, 0.6667667, 0.15896958, 0.92559665, 0.41466194, 0.67063785, 0.11037514, - 0.07103606, 0.2645556, 0.21038257, 0.6563296, 0.0871293, 0.7742337, 0.12892629, 0.13818295}; - - // co h w ci - float16_t weight_data[] = { - 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, 0.9767611, - 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, 0.41426298, - 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962, 0.31856894, - 0.6674104, 0.13179787, 0.7163272, 0.2894061, 0.18319136, 0.5865129, 0.02010755, 0.82894003, 0.00469548}; - - // pack correct data, nhwc - float16_t gnd_data[] = {3.3848767, 1.4446403, 1.8428744, 1.3194335, 2.5873442, 2.1384869, 2.04022, 1.1872686, - 2.2294958, 1.6570128, 2.465089, 1.4294086, 2.7941442, 1.7871612, 2.188921, 1.0601988}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NHWC4, - kNumberTypeFloat16, true, 1e-2); -} - -TEST_F(TestConvolutionDwOpenCL, PadNHWC4Fp16) { - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = 3; - conv_param->input_w_ = 3; - conv_param->input_channel_ = 5; - conv_param->output_batch_ = 1; - conv_param->output_h_ = 3; - conv_param->output_w_ = 3; - conv_param->output_channel_ = 5; - conv_param->kernel_h_ = 3; - conv_param->kernel_w_ = 3; - conv_param->stride_h_ = 1; - conv_param->stride_w_ = 1; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - conv_param->pad_u_ = 1; - conv_param->pad_l_ = 1; + float bias_data[] = {0, 0, 0, 0, 0}; + float output_data[] = {1.189188, 1.0425153, 1.8012011, 0.6074867, 1.2120346, 1.5005531, 0.8346756, 2.4365785, + 0.54975945, 1.6815965, 1.2690231, 0.60214907, 1.6158017, 0.42115876, 0.8854959, 1.1709145, + 1.0929465, 1.3534508, 1.1985044, 1.2932993, 2.4621446, 1.7086457, 2.6977584, 2.1960166, + 2.3769147, 2.3185873, 0.6133741, 0.9687358, 0.9987654, 1.0254729, 0.8368954, 0.74171704, + 0.8749627, 0.8953936, 0.5093431, 1.5496738, 0.54936385, 0.7683113, 1.165742, 1.3682933, + 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(kernel_h, kernel_w, stride_h, stride_w, pad_u, pad_d, pad_l, pad_r, dilation_h, + dilation_w, act_type, input_shape.back()); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable, fp16_enable ? 1e-2 : 1e-5); } - - // nhwc - float16_t input_data[] = { - 0.5488135, 0.3834415, 0.77815676, 0.9446689, 0.6120957, 0.71518934, 0.79172504, 0.87001216, 0.5218483, - 0.616934, 0.60276335, 0.5288949, 0.9786183, 0.41466194, 0.94374806, 0.5448832, 0.56804454, 0.7991586, - 0.2645556, 0.6818203, 0.4236548, 0.92559665, 0.46147937, 0.7742337, 0.3595079, 0.6458941, 0.07103606, - 0.7805292, 0.45615032, 0.43703195, 0.4375872, 0.0871293, 0.11827443, 0.56843394, 0.6976312, 0.891773, - 0.0202184, 0.639921, 0.0187898, 0.06022547, 0.96366274, 0.83261985, 0.14335328, 0.6176355, 0.6667667}; - // float16_t input_data[]={ - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 , - // 1 , 1 , 1 , 1 , 1 }; - // co h w ci - float16_t weight_data[] = { - 0.67063785, 0.21038257, 0.12892629, 0.31542835, 0.36371076, 0.57019675, 0.43860152, 0.9883738, 0.10204481, - 0.20887676, 0.16130951, 0.6531083, 0.2532916, 0.46631077, 0.2444256, 0.15896958, 0.11037514, 0.6563296, - 0.13818295, 0.19658236, 0.36872518, 0.82099324, 0.09710128, 0.8379449, 0.09609841, 0.97645944, 0.4686512, - 0.9767611, 0.6048455, 0.7392636, 0.03918779, 0.28280696, 0.12019656, 0.2961402, 0.11872772, 0.31798318, - 0.41426298, 0.06414749, 0.6924721, 0.56660146, 0.2653895, 0.5232481, 0.09394051, 0.5759465, 0.9292962}; - // float16_t weight_data[]={ - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 , - // 1 , 1 , 1 }; - // pack correct data, nhwc - float16_t gnd_data[] = {1.189188, 1.0425153, 1.8012011, 0.6074867, 1.2120346, 1.5005531, 0.8346756, 2.4365785, - 0.54975945, 1.6815965, 1.2690231, 0.60214907, 1.6158017, 0.42115876, 0.8854959, 1.1709145, - 1.0929465, 1.3534508, 1.1985044, 1.2932993, 2.4621446, 1.7086457, 2.6977584, 2.1960166, - 2.3769147, 2.3185873, 0.6133741, 0.9687358, 0.9987654, 1.0254729, 0.8368954, 0.74171704, - 0.8749627, 0.8953936, 0.5093431, 1.5496738, 0.54936385, 0.7683113, 1.165742, 1.3682933, - 1.0517888, 0.59817517, 0.75649744, 1.2075498, 0.38804203}; - - DepthWiseTestMain(conv_param, input_data, weight_data, gnd_data, schema::Format_NHWC4, - kNumberTypeFloat16, true, 1e-2); } -TEST_F(TestConvolutionDwOpenCL, ProfilingMobilenetv2Fp32) { - std::vector> src_shape{ - {1, 32, 112, 112}, {1, 96, 112, 112}, {1, 144, 56, 56}, {1, 144, 56, 56}, {1, 192, 28, 28}, - {1, 192, 28, 28}, {1, 384, 14, 14}, {1, 576, 14, 14}, {1, 576, 14, 14}, {1, 960, 7, 7}, - }; - std::vector> dst_shape{ - {1, 32, 112, 112}, {1, 96, 56, 56}, {1, 144, 56, 56}, {1, 144, 28, 28}, {1, 192, 28, 28}, - {1, 192, 14, 14}, {1, 384, 14, 14}, {1, 576, 14, 14}, {1, 576, 7, 7}, {1, 960, 7, 7}, - }; - std::vector> filter_shape{ - {32, 1, 1, 1}, {96, 3, 3, 1}, {144, 1, 1, 1}, {144, 3, 3, 1}, {192, 1, 1, 1}, - {192, 3, 3, 1}, {384, 1, 1, 1}, {576, 1, 1, 1}, {576, 3, 3, 1}, {960, 1, 1, 1}, - }; - - // nhwc - const size_t in_size = 96 * 112 * 112; - float *input_data = new (std::nothrow) float[in_size]; - if (input_data == nullptr) { - return; - } - memset(input_data, 0, in_size * sizeof(float_t)); - for (auto i = 0; i < in_size; ++i) { - input_data[i] = 1; - } - // co h w ci - const size_t wt_size = 576 * 3 * 3; - float *weight_data = new (std::nothrow) float[wt_size]; - if (weight_data == nullptr) { - delete[] input_data; - return; - } - memset(weight_data, 0, wt_size); - for (auto i = 0; i < wt_size; ++i) { - weight_data[i] = 1; - } - for (size_t i = 0; i < src_shape.size(); ++i) { - const int MAX_RUN_TIMES = 1; - for (int j = 0; j < MAX_RUN_TIMES; ++j) { - printf("========profiling depthwise, in shape(%d,%d,%d,%d), out shape(%d,%d,%d,%d), iter%d========\n", - src_shape[i][0], src_shape[i][1], src_shape[i][2], src_shape[i][3], dst_shape[i][0], dst_shape[i][1], - dst_shape[i][2], dst_shape[i][3], j); - auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - { - conv_param->input_batch_ = 1; - conv_param->input_h_ = src_shape[i][2]; - conv_param->input_w_ = src_shape[i][3]; - conv_param->input_channel_ = src_shape[i][1]; - conv_param->output_batch_ = 1; - conv_param->output_h_ = dst_shape[i][2]; - conv_param->output_w_ = dst_shape[i][3]; - conv_param->output_channel_ = dst_shape[i][1]; - conv_param->kernel_h_ = filter_shape[i][1]; - conv_param->kernel_w_ = filter_shape[i][2]; - conv_param->stride_h_ = conv_param->output_h_ / conv_param->input_h_; - conv_param->stride_w_ = conv_param->output_w_ / conv_param->input_w_; - conv_param->pad_u_ = (conv_param->kernel_h_ - 1) / 2; - conv_param->pad_l_ = (conv_param->kernel_w_ - 1) / 2; - conv_param->dilation_h_ = 1; - conv_param->dilation_w_ = 1; - } - DepthWiseTestMain(conv_param, input_data, weight_data, nullptr, schema::Format_NHWC4, - kNumberTypeFloat32, false); - } - } - delete[] input_data; - delete[] weight_data; -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc index c2ce6719b6..e9c7cf4c99 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fill_tests.cc @@ -24,7 +24,10 @@ using mindspore::lite::Tensor; using mindspore::schema::PrimitiveType_Fill; using mindspore::schema::PrimitiveType_Shape; using mindspore::schema::Format::Format_NHWC; -namespace mindspore { + +// PrimitiveType_Fill: src/ops/populate/fill_populate.cc + +namespace mindspore::lite::opencl::test { class TestFillOpenCLCI : public mindspore::CommonTest { public: TestFillOpenCLCI() {} @@ -142,4 +145,4 @@ TEST_F(TestFillOpenCLCI, Fp32testshape) { ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fullconnection_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fullconnection_tests.cc index d6b3b39a56..f48a853e14 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/fullconnection_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/fullconnection_tests.cc @@ -13,183 +13,78 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/matmul_parameter.h" -namespace mindspore { -class TestFullConnectionOpenCL : public mindspore::CommonTest { - public: - TestFullConnectionOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseFullConnection(const std::vector &shape, void *input_data, void *weight_data, void *bias_data, - void *output_data, bool enable_fp16, int dims) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - std::vector input_shape, output_shape, weight_shape, bias_shape; - if (dims == 2) { - int ci = shape[0]; - int co = shape[1]; - input_shape = {1, ci}; - output_shape = {1, co}; - weight_shape = {co, ci}; - bias_shape = {co}; - } else if (dims == 4) { - int n = shape[0]; - int h = shape[1]; - int w = shape[2]; - int ci = shape[3]; - int co = shape[4]; - input_shape = {n, h, w, ci}; - output_shape = {n, co}; - weight_shape = {co, h * w * ci}; - bias_shape = {co}; - } - auto param = static_cast(malloc(sizeof(MatMulParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } +class TestOpenCL_FullConnection : public CommonTest {}; + +namespace { +// PrimitiveType_FullConnection: src/ops/populate/full_connection_populate.cc +OpParameter *CreateParameter(std::vector *input_shape, std::vector *weight_shape, + std::vector *bias_shape, std::vector *output_shape, int ndim, int ci, int co, + int n = 1, int h = 1, int w = 1) { + auto *param = test::CreateParameter(schema::PrimitiveType_FullConnection); param->a_transpose_ = false; param->b_transpose_ = true; param->has_bias_ = true; param->act_type_ = ActType_No; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, dims == 2 ? schema::Format_NC : schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - - auto tensor_w_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - weight_shape, schema::Format_NC); - auto tensor_w = tensor_w_ptr.get(); - if (tensor_w == nullptr) { - MS_LOG(ERROR) << "tensor_w create error."; - return; - } - tensor_w->set_data(weight_data); - - auto tensor_bias_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - bias_shape, schema::Format_NC); - auto tensor_bias = tensor_bias_ptr.get(); - if (tensor_bias == nullptr) { - MS_LOG(ERROR) << "tensor_w create error."; - return; - } - tensor_bias->set_data(bias_data); - - auto tensor_out_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - output_shape, schema::Format_NC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_w, tensor_bias}; - std::vector outputs{tensor_out}; - auto op_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - inputs[0]->MallocData(allocator); - - std::vector kernels{op_kernel}; - - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, tensor_x->ElementsNum() * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); - } - for (auto t : inputs) { - t->set_data(nullptr); + if (ndim == 2) { + *input_shape = {1, ci}; + *output_shape = {1, co}; + *weight_shape = {co, ci}; + *bias_shape = {co}; + } else if (ndim == 4) { + *input_shape = {n, h, w, ci}; + *output_shape = {n, co}; + *weight_shape = {co, h * w * ci}; + *bias_shape = {co}; } - for (auto t : outputs) { - t->set_data(nullptr); - } - MS_LOG(INFO) << "TestFullConnection passed"; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestFullConnectionOpenCL, FullConnection2DFp32) { +TEST_F(TestOpenCL_FullConnection, 2D) { + int ndim = 2; int ci = 5; int co = 3; - std::vector shape = {ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector bias_data = {1.0f, 1.0f, 1.0f}; - std::vector output_data = {11.f, 11.f, 11.f}; - RunTestCaseFullConnection(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), false, - 2); -} + float input_data[] = {0, 1, 2, 3, 4}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float bias_data[] = {1, 1, 1}; + float output_data[] = {11, 11, 11}; -TEST_F(TestFullConnectionOpenCL, FullConnection2DFp16) { - int ci = 5; - int co = 3; - std::vector shape = {ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector bias_data = {1.0f, 1.0f, 1.0f}; - std::vector output_data = {11.f, 11.f, 11.f}; - RunTestCaseFullConnection(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), true, - 2); + for (auto fp16_enable : {false, true}) { + std::vector input_shape, weight_shape, bias_shape, output_shape; + auto *param = CreateParameter(&input_shape, &weight_shape, &bias_shape, &output_shape, ndim, ci, co); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestFullConnectionOpenCL, FullConnection4DFp32) { - int n = 1; - int h = 2; - int w = 1; - int c = 4; +TEST_F(TestOpenCL_FullConnection, 4D) { + int ndim = 4; + int ci = 4; int co = 2; - std::vector shape = {n, h, w, c, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector bias_data = {1.0f, 1.0f}; - std::vector output_data = {29.f, 29.f}; - RunTestCaseFullConnection(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), false, - 4); -} - -TEST_F(TestFullConnectionOpenCL, FullConnection4DFp16) { int n = 1; int h = 2; int w = 1; - int c = 4; - int co = 2; - std::vector shape = {n, h, w, c, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector bias_data = {1.0f, 1.0f}; - std::vector output_data = {29.f, 29.f}; - RunTestCaseFullConnection(shape, input_data.data(), weight_data.data(), bias_data.data(), output_data.data(), true, - 4); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float bias_data[] = {1, 1}; + float output_data[] = {29, 29}; + + for (auto fp16_enable : {false, true}) { + std::vector input_shape, weight_shape, bias_shape, output_shape; + auto *param = CreateParameter(&input_shape, &weight_shape, &bias_shape, &output_shape, ndim, ci, co, n, h, w); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, weight_data, CONST_TENSOR}, + {bias_shape, bias_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc index cacfec82e1..4db30fa920 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/gather_tests.cc @@ -13,177 +13,108 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/gather_parameter.h" -namespace mindspore { -class TestGatherOpenCL : public mindspore::CommonTest { - public: - TestGatherOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -template -void test_main_gather(void *input_data, void *correct_data, const std::vector &input_shape, - const std::vector &indices, GatherParameter *param, TypeId data_type, - schema::Format format) { - MS_LOG(INFO) << " begin test "; - auto ocl_wrp = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = ocl_wrp.GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); +class TestOpenCL_Gather : public CommonTest {}; - std::vector indices_shape = {static_cast(indices.size())}; - std::vector output_shape = input_shape; - output_shape[param->axis_] = indices.size(); - - auto tensor_a = lite::Tensor(TypeId(data_type), input_shape, format); - auto tensor_b = lite::Tensor(kNumberTypeInt32, indices_shape, schema::Format_NC); - auto tensor_c = lite::Tensor(TypeId(data_type), output_shape, format); - std::vector inputs{&tensor_a, &tensor_b}; - std::vector outputs{&tensor_c}; - size_t input_size = tensor_a.Size(); - - auto *pkernel = - new (std::nothrow) kernel::GatherOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (pkernel == nullptr) { - MS_LOG(INFO) << "new GatherOpenCLKernel failed "; - return; - } - pkernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{pkernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&tensor_a}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - delete pkernel; - MS_LOG(INFO) << " new SubGraphOpenCLKernel failed "; - return; - } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, input_size); - auto input1_tensor = reinterpret_cast(inputs[1]->data_c()); - for (int i = 0; i < inputs[1]->ElementsNum(); ++i) { - input1_tensor[i] = indices.at(i); - } - sub_graph->Run(); - - std::cout << "==================output data================" << std::endl; - auto *output_data = reinterpret_cast(outputs[0]->data_c()); - for (size_t i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << output_data[i] << " "; - } - std::cout << std::endl; - std::cout << "==================expected data================" << std::endl; - for (size_t i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << static_cast(correct_data)[i] << " "; - } - std::cout << std::endl; - CommonTest::CompareOutputData(output_data, static_cast(correct_data), outputs[0]->ElementsNum(), 0.0001); -} -TEST_F(TestGatherOpenCL, Axis0Fp16) { - std::vector input_shape{5, 10, 10, 5}; - std::vector indices{1, 0, 3, 4}; - GatherParameter *param = std::make_unique().release(); - param->axis_ = 0; - size_t input_size, output_size; - std::string inputPpath = "./test_data/gatherfp16_input.bin"; - std::string correctOutputPath = "./test_data/gatherfp16_output.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(inputPpath.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - if (param == nullptr) { - return; - } - TypeId data_type = kNumberTypeFloat16; - schema::Format format = schema::Format_NHWC; - test_main_gather(input_data, correct_data, input_shape, indices, param, data_type, format); +namespace { +// PrimitiveType_Gather: src/ops/populate/gather_populate.cc +OpParameter *CreateParameter(int axis) { + auto *param = test::CreateParameter(schema::PrimitiveType_Gather); + param->axis_ = axis; + return reinterpret_cast(param); } +} // namespace + +TEST_F(TestOpenCL_Gather, Axis0) { + int axis = 0; + std::vector input_shape = {10}; + std::vector indices_shape = {2}; + std::vector output_shape = {2}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + int32_t indices[] = {1, 3}; + float output_data[] = {1, 3}; -TEST_F(TestGatherOpenCL, Axis0Fp32) { - std::vector input_shape{5, 10, 10, 5}; - std::vector indices{1, 2, 3, 4}; - GatherParameter *param = std::make_unique().release(); - param->axis_ = 0; - size_t input_size, output_size; - std::string inputPpath = "./test_data/gatherfp32_input.bin"; - std::string correctOutputPath = "./test_data/gatherfp32_output.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(inputPpath.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - if (param == nullptr) { - return; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain( + {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable); } - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_gather(input_data, correct_data, input_shape, indices, param, data_type, format); } -TEST_F(TestGatherOpenCL, Axis1Fp32) { - std::vector input_shape{1, 5, 4, 4}; - std::vector indices{1, 3}; - GatherParameter *param = reinterpret_cast(malloc(sizeof(GatherParameter))); - param->axis_ = 1; +TEST_F(TestOpenCL_Gather, Axis1) { + int axis = 1; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 2, 4, 4}; float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; - float correct_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; - if (param == nullptr) { - return; + float output_data[] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + + int32_t indices_int32[] = {1, 3}; + int64_t indices_int64[] = {1, 3}; + float32_t indices_fp32[] = {1, 3}; + float16_t indices_fp16[] = {1, 3}; + TypeId data_types[] = {kNumberTypeInt32, kNumberTypeInt64, kNumberTypeFloat32, kNumberTypeFloat16}; + void *indices_datas[] = {indices_int32, indices_int64, indices_fp32, indices_fp16}; + + for (int i = 0; i < 1; ++i) { + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {indices_shape, indices_datas[i], CONST_TENSOR, data_types[i]}}, + {output_shape, output_data}, param, fp16_enable); + } } - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_gather(input_data, correct_data, input_shape, indices, param, data_type, format); } -TEST_F(TestGatherOpenCL, Axis2Fp32) { - std::vector input_shape{1, 5, 4, 4}; - std::vector indices{1, 3}; - GatherParameter *param = std::make_unique().release(); - param->axis_ = 2; +TEST_F(TestOpenCL_Gather, Axis2) { + int axis = 2; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 5, 2, 4}; float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; - float correct_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39, - 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; - if (param == nullptr) { - return; + int32_t indices[] = {1, 3}; + float output_data[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31, 36, 37, 38, 39, + 44, 45, 46, 47, 52, 53, 54, 55, 60, 61, 62, 63, 68, 69, 70, 71, 76, 77, 78, 79}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain( + {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable); } - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_gather(input_data, correct_data, input_shape, indices, param, data_type, format); } -TEST_F(TestGatherOpenCL, Axis3Fp32) { - std::vector input_shape{1, 5, 4, 4}; - std::vector indices{1, 3}; - GatherParameter *param = std::make_unique().release(); - param->axis_ = 3; +TEST_F(TestOpenCL_Gather, Axis3) { + int axis = 3; + std::vector input_shape = {1, 5, 4, 4}; + std::vector indices_shape = {2}; + std::vector output_shape = {1, 5, 4, 2}; float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79}; - float correct_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, - 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; - if (param == nullptr) { - return; + int32_t indices[] = {1, 3}; + float output_data[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, + 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain( + {{input_shape, input_data, VAR, kNumberTypeFloat32}, {indices_shape, indices, CONST_TENSOR, kNumberTypeInt32}}, + {output_shape, output_data}, param, fp16_enable); } - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_gather(input_data, correct_data, input_shape, indices, param, data_type, format); } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/hswish_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/hswish_tests.cc deleted file mode 100644 index 830bc36804..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/hswish_tests.cc +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/hswish.h" -using mindspore::lite::Tensor; -using mindspore::schema::Format::Format_NHWC; -namespace mindspore { -class TestSwishOpenCLCI : public mindspore::CommonTest { - public: - TestSwishOpenCLCI() {} -}; - -TEST_F(TestSwishOpenCLCI, Fp32CI) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - std::vector input_shape = {2, 10, 1, 4}; - std::vector output_shape = {2, 10, 1, 4}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - float input_data[] = {2.5f, 6.0f, -7.4f, -3.5f, 5.9f, 6.5f, -8.0f, 7.4f, 5.9f, 6.5f, -8.0f, 7.4f, 7.5f, 6.0f, - -7.4f, -3.5f, 7.5f, 6.0f, -7.4f, -3.5f, 5.9f, 6.5f, -8.0f, 7.4f, 5.9f, 6.5f, -8.0f, 7.4f, - 7.5f, 6.0f, -7.4f, -3.5f, 7.5f, 6.0f, -7.4f, -3.5f, 5.9f, 6.5f, -8.0f, 7.4f, 5.9f, 6.5f, - -8.0f, 7.4f, 7.5f, 6.0f, -7.4f, -3.5f, 7.5f, 6.0f, -7.4f, -3.5f, 5.9f, 6.5f, -8.0f, 7.4f, - 5.9f, 6.5f, -8.0f, 7.4f, 7.5f, 6.0f, -7.4f, -3.5f, 7.5f, 6.0f, -7.4f, -3.5f, 5.9f, 6.5f, - -8.0f, 7.4f, 5.9f, 6.5f, -8.0f, 7.4f, 7.5f, 6.0f, -7.4f, -3.5f}; - - float correctOutput[] = {0.9167f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, - 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, - 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f, - 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, - 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f}; - auto output_tensor = Tensor(data_type, input_shape, Format_NHWC, tensor_type); - auto in_tensor = Tensor(data_type, output_shape, Format_NHWC, tensor_type); - std::vector inputs{&in_tensor}; - std::vector outputs{&output_tensor}; - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(ActivationParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; - } - - auto *hswish_kernel = - new (std::nothrow) kernel::HswishOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (hswish_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::HswishOpenCLKernel failed "; - delete param; - return; - } - hswish_kernel->Init(); - // to do allocate memory for inputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{hswish_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete hswish_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data, sizeof(input_data)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; -} -} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc index 61c11ae2a2..c13bcdb210 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/matmul_tests.cc @@ -13,169 +13,61 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/matmul_parameter.h" -namespace mindspore { -class TestMatMulOpenCL : public mindspore::CommonTest { - public: - TestMatMulOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseMatMul(const std::vector &shape, void *input_data, void *weight_data, void *output_data, - bool enable_fp16, int dims) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - std::vector input_shape, output_shape, weight_shape; - if (dims == 2) { - int ci = shape[0]; - int co = shape[1]; - input_shape = {1, ci}; - output_shape = {1, co}; - weight_shape = {co, ci}; - } else if (dims == 4) { - int a = shape[0]; - int b = shape[1]; - int m = shape[2]; - int ci = shape[3]; - int co = shape[4]; - input_shape = {a, b, m, ci}; - output_shape = {a, b, m, co}; - weight_shape = {a, b, co, ci}; - } - auto param = static_cast(malloc(sizeof(MatMulParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->a_transpose_ = false; - param->b_transpose_ = true; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, dims == 2 ? schema::Format_NC : schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } +class TestOpenCL_MatMul : public CommonTest {}; - auto tensor_w_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - weight_shape, dims == 2 ? schema::Format_NC : schema::Format_NHWC); - auto tensor_w = tensor_w_ptr.get(); - if (tensor_w == nullptr) { - MS_LOG(ERROR) << "tensor_w create error."; - return; - } - tensor_w->set_data(weight_data); - - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), output_shape, - dims == 2 ? schema::Format_NC : schema::Format_NHWC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_w}; - std::vector outputs{tensor_out}; - auto op_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - inputs[0]->MallocData(allocator); - - std::vector kernels{op_kernel}; - - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, tensor_x->ElementsNum() * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); - } - - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - MS_LOG(INFO) << "TestMatMul passed"; +namespace { +// PrimitiveType_MatMul: src/ops/populate/matmul_populate.cc +OpParameter *CreateParameter(bool a_transpose = false, bool b_transpose = true) { + auto *param = test::CreateParameter(schema::PrimitiveType_MatMul); + param->a_transpose_ = a_transpose; + param->b_transpose_ = b_transpose; + param->has_bias_ = false; + param->act_type_ = ActType_No; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestMatMulOpenCL, MatMul2DFp32) { +TEST_F(TestOpenCL_MatMul, 2D) { int ci = 5; int co = 3; - std::vector shape = {ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {10.f, 10.f, 10.f}; - RunTestCaseMatMul(shape, input_data.data(), weight_data.data(), output_data.data(), false, 2); -} + std::vector input_shape = {1, ci}; + std::vector output_shape = {1, co}; + std::vector weight_shape = {co, ci}; + float input_data[] = {0, 1, 2, 3, 4}; + float weight_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float output_data[] = {10, 10, 10}; -TEST_F(TestMatMulOpenCL, MatMul2DFp16) { - int ci = 5; - int co = 3; - std::vector shape = {ci, co}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f}; - std::vector weight_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {10.f, 10.f, 10.f}; - RunTestCaseMatMul(shape, input_data.data(), weight_data.data(), output_data.data(), true, 2); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(); + TestMain({{input_shape, input_data, VAR}, {weight_shape, weight_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -TEST_F(TestMatMulOpenCL, MatMul4DFp32) { +TEST_F(TestOpenCL_MatMul, 4D) { int a = 1; int b = 2; - int c = 2; + int m = 2; int ci = 5; int co = 3; - std::vector shape = {a, b, c, ci, co}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f}; - std::vector output_data = {15.0f, 40.0f, 65.0f, 15.0f, 40.0f, 65.0f, - 90.0f, 115.0f, 140.0f, 90.0f, 115.0f, 140.0f}; - RunTestCaseMatMul(shape, input_data.data(), weight_data.data(), output_data.data(), false, 4); -} + std::vector input_shape = {a, b, m, ci}; + std::vector output_shape = {a, b, m, co}; + std::vector weight_shape = {a, b, co, ci}; + float input_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float weight_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}; + float output_data[] = {15, 40, 65, 15, 40, 65, 90, 115, 140, 90, 115, 140}; -TEST_F(TestMatMulOpenCL, MatMul4DFp16) { - int a = 1; - int b = 2; - int c = 2; - int ci = 5; - int co = 3; - std::vector shape = {a, b, c, ci, co}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, - 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, - 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f}; - std::vector output_data = {15.0f, 40.0f, 65.0f, 15.0f, 40.0f, 65.0f, - 90.0f, 115.0f, 140.0f, 90.0f, 115.0f, 140.0f}; - RunTestCaseMatMul(shape, input_data.data(), weight_data.data(), output_data.data(), true, 4); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(); + TestMain({{input_shape, input_data, VAR}, {weight_shape, weight_data, CONST_TENSOR}}, {output_shape, output_data}, + param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc index 150cde5620..ed0fc24c1a 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/one_hot_tests.cc @@ -13,522 +13,592 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" - -namespace mindspore { -class TestOneHotOpenCL : public mindspore::CommonTest { - public: - TestOneHotOpenCL() {} -}; - -void RunTestCaseOneHot(const std::vector &shape_in, const std::vector &shape_out, void *input_data, - void *output_data, int axis, int depth, float on_value, float off_value) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(OneHotParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->axis_ = axis; - auto tensor_x_ptr = std::make_unique(kNumberTypeFloat32, shape_in, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - std::vector weight_shape = {}; - auto tensor_depth_ptr = std::make_unique(kNumberTypeInt32, weight_shape, schema::Format_NHWC); - auto tensor_depth = tensor_depth_ptr.get(); - if (tensor_depth == nullptr) { - MS_LOG(ERROR) << "tensor_depth create error."; - return; - } - tensor_depth->set_data(&depth); - auto tensor_on_value_ptr = std::make_unique(kNumberTypeFloat32, weight_shape, schema::Format_NHWC); - auto tensor_on_value = tensor_on_value_ptr.get(); - if (tensor_on_value == nullptr) { - MS_LOG(ERROR) << "tensor_on_value create error."; - return; - } - tensor_on_value->set_data(&on_value); - auto tensor_off_value_ptr = std::make_unique(kNumberTypeFloat32, weight_shape, schema::Format_NHWC); - auto tensor_off_value = tensor_off_value_ptr.get(); - if (tensor_off_value == nullptr) { - MS_LOG(ERROR) << "tensor_off_value create error."; - return; - } - tensor_off_value->set_data(&off_value); - auto tensor_out_ptr = std::make_unique(kNumberTypeFloat32, shape_out); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_depth, tensor_on_value, tensor_off_value}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/fp32/one_hot_fp32.h" - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * sizeof(int)); - pGraph->Run(); +namespace mindspore::lite::opencl::test { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } +class TestOpenCL_OneHot : public CommonTest {}; - MS_LOG(INFO) << "Test OneHot passed"; +namespace { +// PrimitiveType_OneHot: src/ops/populate/one_hot_populate.cc +OpParameter *CreateParameter(int axis) { + auto *param = test::CreateParameter(schema::PrimitiveType_OneHot); + param->axis_ = axis; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestOneHotOpenCL, OneHot4DAxis3Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis3Fp32) { int depth = 4; int axis = -1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {3, 4, -1, 2}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {3, 4, -1, 2}; + float output_data[] = {-1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}; + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis3T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T2Fp32) { int depth = 5; int axis = -1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {-1, 3, 4, 5}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {-1, 3, 4, 5}; + float output_data[] = {-1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis3T3Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T3Fp32) { int depth = 9; int axis = -1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 3}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4, 9, 8, 9, 1, 8}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 3}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4, 9, 8, 9, 1, 8}; + float output_data[] = {-1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis3T4Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis3T4Fp32) { int depth = 6; int axis = -1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {2, 4, 0, 6, 1, 6, 2, 2, 4, 5}; - std::vector output_data = {-1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {2, 4, 0, 6, 1, 6, 2, 2, 4, 5}; + float output_data[] = {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, + -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis2Fp32) { int depth = 5; int axis = 2; - float on_value = 2.f; - float off_value = 0.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {2, 3, 0, 3}; - std::vector output_data = {0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, - 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = 0; + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {2, 3, 0, 3}; + float output_data[] = {0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis2T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T2Fp32) { int depth = 5; int axis = 2; - float on_value = 2.f; - float off_value = 0.f; - std::vector shape_in = {1, 6, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {1, 1, 1, 0, 1, 1, 4, -1, 4, 4, -1, 1}; - std::vector output_data = {0.0f, 0.0f, 2.0f, 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, - 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 2.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 2.0f, 2.0f, 0.0f, 0.0f, 0.0f, 2.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = 0; + std::vector input_shape = {1, 6, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {1, 1, 1, 0, 1, 1, 4, -1, 4, 4, -1, 1}; + float output_data[] = {0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis2T3Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T3Fp32) { int depth = 1; int axis = 2; - float on_value = 2.f; - float off_value = 0.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {-1, 1, -1, 0}; - std::vector output_data = {0.0f, 0.0f, 0.0f, 2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = 0; + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {-1, 1, -1, 0}; + float output_data[] = {0, 0, 0, 2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis2T4Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis2T4Fp32) { int depth = 5; int axis = 2; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4, 0, -1, 2, 5, 4, -1, 4, 4, 4}; - std::vector output_data = {-1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4, 0, -1, 2, 5, 4, -1, 4, 4, 4}; + float output_data[] = {-1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, + -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis1T1Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T1Fp32) { int depth = 1; int axis = 1; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {1, 6, 6}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {0, -1, 1, 0, -1, -1, 0, 0, -1, 1, 0, -1, -1, 1, 1, -1, 1, 1, - -1, 1, 1, 1, -1, 0, 0, -1, 0, 0, 1, 1, 1, 1, 0, 0, 0, -1}; - std::vector output_data = {2.0f, -2.0f, -2.0f, 2.0f, -2.0f, -2.0f, 2.0f, 2.0f, -2.0f, -2.0f, 2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, - 2.0f, -2.0f, 2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, 2.0f, 2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {1, 6, 6}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {0, -1, 1, 0, -1, -1, 0, 0, -1, 1, 0, -1, -1, 1, 1, -1, 1, 1, + -1, 1, 1, 1, -1, 0, 0, -1, 0, 0, 1, 1, 1, 1, 0, 0, 0, -1}; + float output_data[] = {2, -2, -2, 2, -2, -2, 2, 2, -2, -2, 2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, 2, 2, -2, 2, 2, -2, -2, -2, -2, 2, 2, 2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis1T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T2Fp32) { int depth = 4; int axis = 1; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {-1, 1, 1, 2}; - std::vector output_data = {-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, 2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {-1, 1, 1, 2}; + float output_data[] = {-2, -2, -2, -2, -2, 2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis1T3Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis1T3Fp32) { int depth = 5; int axis = 1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {3, 5, 2, 0, 2, 2, -1, 0, 4, 3}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, 1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {3, 5, 2, 0, 2, 2, -1, 0, 4, 3}; + float output_data[] = {-1, -1, -1, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, -1, -1, -1, 1, -1, -1, -1, + -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis0Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis0Fp32) { int depth = 5; int axis = 0; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {1, 2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4, 0, 3, 3}; - std::vector output_data = {-2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, 2.0f, 2.0f, -2.0f, -2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {1, 2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4, 0, 3, 3}; + float output_data[] = {-2, 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, -2, -2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis0T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T2Fp32) { int depth = 5; int axis = 0; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {1, 2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {2, 4, 4, 3, 5, 0, 3, 3, -1, 2}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, - -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, - -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {1, 2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {2, 4, 4, 3, 5, 0, 3, 3, -1, 2}; + float output_data[] = {-1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, 1, + -1, -1, 1, 1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot4DAxis0T3Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot4DAxis0T3Fp32) { int depth = 5; int axis = 0; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {2, 2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {0, 3, 2, 0, 0, 3, 4, 1, 5, 1, 4, -1, 3, 3, 1, 1, 4, 2, 2, 4}; - std::vector output_data = { - 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, 1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, - -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {2, 2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {0, 3, 2, 0, 0, 3, 4, 1, 5, 1, 4, -1, 3, 3, 1, 1, 4, 2, 2, 4}; + float output_data[] = {1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, + -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, + -1, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, -1, -1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis0Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis0Fp32) { int depth = 5; int axis = 0; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {2, 3}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4, 4, 3, 2, -1, 5}; - std::vector output_data = {-2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f, - 2.0f, -2.0f, -2.0f, -2.0f, 2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {2, 3}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4, 4, 3, 2, -1, 5}; + float output_data[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + 2, -2, -2, -2, -2, 2, -2, -2, -2, 2, 2, -2, -2, -2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis0T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis0T2Fp32) { int depth = 5; int axis = 0; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4, 2, 2, 3, -1, 5, 2, 4, 5, -1}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4, 2, 2, 3, -1, 5, 2, 4, 5, -1}; + float output_data[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 1, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, + -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis1Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis1Fp32) { int depth = 5; int axis = 1; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {2, 3}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {0, 0, 0, 0, 4, -1}; - std::vector output_data = {2.0f, 2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, -2.0f, -2.0f, -2.0f, -2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {2, 3}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {0, 0, 0, 0, 4, -1}; + float output_data[] = {2, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis1T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis1T2Fp32) { int depth = 5; int axis = 1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {1, -1, 3, 2, 5, 5, 4, 5, 0, -1}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {1, -1, 3, 2, 5, 5, 4, 5, 0, -1}; + float output_data[] = {-1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, + 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis2Fp32) { int depth = 4; int axis = 2; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {2, 2}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {0, 3, 4, 2}; - std::vector output_data = {2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, - -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, -2.0f, 2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {2, 2}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {0, 3, 4, 2}; + float output_data[] = {2, -2, -2, -2, -2, -2, -2, 2, -2, -2, -2, -2, -2, -2, 2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot3DAxis2T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot3DAxis2T2Fp32) { int depth = 5; int axis = 2; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {2, 5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {0, -1, 2, -1, 5, 4, 2, -1, 4, -1}; - std::vector output_data = {1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, - -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {2, 5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {0, -1, 2, -1, 5, 4, 2, -1, 4, -1}; + float output_data[] = {1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, 1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot2DAxis0Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot2DAxis0Fp32) { int depth = 3; int axis = 0; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {3}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {2, 1, 3}; - std::vector output_data = {-2.0f, -2.0f, -2.0f, -2.0f, 2.0f, -2.0f, 2.0f, -2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {3}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {2, 1, 3}; + float output_data[] = {-2, -2, -2, -2, 2, -2, 2, -2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot2DAxis0T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot2DAxis0T2Fp32) { int depth = 5; int axis = 0; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {2, 2, 0, 0, 4}; - std::vector output_data = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {2, 2, 0, 0, 4}; + float output_data[] = {-1, -1, 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot2DAxis1Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot2DAxis1Fp32) { int depth = 3; int axis = -1; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {3}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {1, 2, 0}; - std::vector output_data = {-2.0f, 2.0f, -2.0f, -2.0f, -2.0f, 2.0f, 2.0f, -2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {3}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {1, 2, 0}; + float output_data[] = {-2, 2, -2, -2, -2, 2, 2, -2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot2DAxis1T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot2DAxis1T2Fp32) { int depth = 5; int axis = -1; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {5}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {5, 4, 0, 4, -1}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, - -1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {5}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {5, 4, 0, 4, -1}; + float output_data[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot1DAxis0Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot1DAxis0Fp32) { int depth = 3; int axis = -1; - float on_value = 2.f; - float off_value = -2.f; - std::vector shape_in = {}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {1}; - std::vector output_data = {-2.0f, 2.0f, -2.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 2; + float off_value = -2; + std::vector input_shape = {}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {1}; + float output_data[] = {-2, 2, -2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestOneHotOpenCL, OneHot1DAxis0T2Fp32) { +TEST_F(TestOpenCL_OneHot, OneHot1DAxis0T2Fp32) { int depth = 5; int axis = 0; - float on_value = 1.f; - float off_value = -1.f; - std::vector shape_in = {}; - std::vector shape_out = shape_in; - shape_out.insert(shape_out.begin() + (axis + shape_in.size() + 1) % (shape_in.size() + 1), depth); - std::vector input_data = {4}; - std::vector output_data = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f}; - - RunTestCaseOneHot(shape_in, shape_out, input_data.data(), output_data.data(), axis, depth, on_value, off_value); + float on_value = 1; + float off_value = -1; + std::vector input_shape = {}; + std::vector output_shape = input_shape; + output_shape.insert(output_shape.begin() + (axis + input_shape.size() + 1) % (input_shape.size() + 1), depth); + int input_data[] = {4}; + float output_data[] = {-1, -1, -1, -1, 1}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}, + {{}, &depth, CONST_SCALAR, kNumberTypeInt32}, + {{}, &on_value, CONST_SCALAR, kNumberTypeFloat32}, + {{}, &off_value, CONST_SCALAR, kNumberTypeFloat32}}, + {output_shape, output_data}, param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/opencl_kernel_tests.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/opencl_kernel_tests.h deleted file mode 100644 index eb36b5c9c8..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/opencl_kernel_tests.h +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "common/common_test.h" -#include "src/common/log_adapter.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" - -#ifndef TESTS_UT_OPENCL_KERNLE_TESTS_H -#define TESTS_UT_OPENCL_KERNLE_TESTS_H - -namespace mindspore { - -class TestOpenCLKernel : public mindspore::CommonTest { - public: - TestOpenCLKernel() {} -}; - -} // namespace mindspore -#endif // TESTS_UT_OPENCL_KERNLE_TESTS_H diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc index cae15c12f3..f22ba50b70 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pad_tests.cc @@ -13,155 +13,221 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h" -#include "nnacl/pack.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/pad_parameter.h" -using mindspore::kernel::LiteKernel; -using mindspore::kernel::PadOpenCLKernel; -using mindspore::kernel::SubGraphOpenCLKernel; -using mindspore::lite::Tensor; -using mindspore::schema::Format; -using mindspore::schema::Format_NC4HW4; -using mindspore::schema::Format_NHWC; -using mindspore::schema::Format_NHWC4; -using mindspore::schema::NodeType_ValueNode; -using mindspore::schema::PaddingMode; -using mindspore::schema::PaddingMode_CONSTANT; -using mindspore::schema::PaddingMode_REFLECT; -using mindspore::schema::PaddingMode_SYMMETRIC; +namespace mindspore::lite::opencl::test { -namespace mindspore { +class TestOpenCL_Pad : public CommonTest {}; -class TestPadOpenCL : public mindspore::CommonTest {}; - -void TEST_MAIN(PadParameter *param, Format input_format, Format output_format, Format op_format, const TypeId data_type, - const std::vector &input_shape, const std::vector &output_shape, const float *input_data, - const float *expect_data) { - auto ocl_runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = ocl_runtime_wrapper.GetInstance(); - ocl_runtime->Init(); - ocl_runtime->SetFp16Enable(data_type == kNumberTypeFloat16); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(DEBUG) << "create Tensors"; - auto input = Tensor(kNumberTypeFloat32, input_shape, input_format, lite::Tensor::CONST_TENSOR); - auto output = Tensor(kNumberTypeFloat32, output_shape, output_format, lite::Tensor::CONST_TENSOR); - - MS_LOG(DEBUG) << "create OpenCL Kernel"; - std::vector inputs{&input}; - std::vector outputs{&output}; - auto kernel = std::make_unique(reinterpret_cast(param), inputs, outputs); - if (kernel == nullptr) { - return; +namespace { +// PrimitiveType_Pad: src/ops/populate/pad_populate.cc +OpParameter *CreateParameter(const std::vector &paddings, float constant_value) { + auto *param = test::CreateParameter(schema::PrimitiveType_Pad); + param->pad_mode_ = schema::PaddingMode_CONSTANT; + param->constant_value_ = constant_value; + param->padding_length = MAX_PAD_SIZE; + int size = paddings.size(); + for (size_t i = 0; i < MAX_PAD_SIZE - size; ++i) { + param->paddings_[i] = 0; } - kernel->Init(); - - MS_LOG(DEBUG) << "create SubGraph"; - std::vector kernels{kernel.release()}; - auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel({&input}, {&output}, kernels, kernels, kernels); - input.MallocData(allocator); - sub_graph->Init(); - memcpy(input.data_c(), input_data, input.Size()); - sub_graph->Run(); - if (CommonTest::CompareOutputData(reinterpret_cast(output.data_c()), const_cast(expect_data), - static_cast(output.ElementsNum()))) { - FAIL(); - } else { - std::cout << "COMPARE SUCCESS!\n"; + for (size_t i = 0; i < size; i++) { + param->paddings_[MAX_PAD_SIZE - size + i] = paddings[i]; } - - MS_LOG(DEBUG) << "release resources"; - input.set_data(nullptr); - output.set_data(nullptr); - delete sub_graph; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestPadOpenCL, TestPad3) { - auto param = static_cast(malloc(sizeof(PadParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "PadParameter create error."; - return; +TEST_F(TestOpenCL_Pad, 1D) { + float input_data[] = {1, 1, 1, 1}; + float output_data[] = {2, 2, 2, 1, 1, 1, 1, 2, 2}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({3, 2}, 2); + TestMain({{{4}, input_data, VAR}}, {{9}, output_data}, param, fp16_enable); } - param->pad_mode_ = PaddingMode_CONSTANT; - param->constant_value_ = 0.0f; - param->padding_length = MAX_PAD_SIZE; - int paddings[MAX_PAD_SIZE] = {0, 0, 3, 3, 3, 3, 0, 0}; - memcpy(param->paddings_, paddings, sizeof(paddings)); - - float input_data[48] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, - 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, - 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, - 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0}; - float expect_data[300] = { - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 12.0, 13.0, 14.0, 15.0, - 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 36.0, - 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, - 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; +} - TEST_MAIN(param, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, {1, 4, 4, 3}, {1, 10, 10, 3}, input_data, - expect_data); - TEST_MAIN(param, Format_NHWC, Format_NHWC, Format_NC4HW4, kNumberTypeFloat32, {1, 4, 4, 3}, {1, 10, 10, 3}, - input_data, expect_data); - TEST_MAIN(param, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat16, {1, 4, 4, 3}, {1, 10, 10, 3}, input_data, - expect_data); - TEST_MAIN(param, Format_NHWC, Format_NHWC, Format_NC4HW4, kNumberTypeFloat16, {1, 4, 4, 3}, {1, 10, 10, 3}, - input_data, expect_data); +TEST_F(TestOpenCL_Pad, 2D) { + float input_data[] = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2}; + float output_data[] = {10, 10, 10, 10, 10, 10, 10, 10, 10, 1, 1, 1, 1, 1, 10, 10, + 10, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 1, 1, 2}, 10); + TestMain({{{2, 5}, input_data, VAR}}, {{4, 8}, output_data}, param, fp16_enable); + } } -TEST_F(TestPadOpenCL, TestPad4) { - auto param = static_cast(malloc(sizeof(PadParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "PadParameter create error."; - return; +TEST_F(TestOpenCL_Pad, 4D) { + float input_data[48] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}; + float output_data[300] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 0, 3, 3, 3, 3, 0, 0}, 0); + TestMain({{{1, 4, 4, 3}, input_data, VAR}}, {{1, 10, 10, 3}, output_data}, param, fp16_enable); } - param->pad_mode_ = PaddingMode_CONSTANT; - param->constant_value_ = 1.0f; - param->padding_length = MAX_PAD_SIZE; - int paddings[MAX_PAD_SIZE] = {0, 0, 3, 3, 3, 3, 0, 0}; - memcpy(param->paddings_, paddings, sizeof(paddings)); - float input_data[48] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, - 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, - 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, - 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0}; - float expect_data[300] = { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 12.0, 13.0, 14.0, 15.0, - 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 36.0, - 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + float output_data1[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 0, 3, 3, 3, 3, 0, 0}, 1); + TestMain({{{1, 4, 4, 3}, input_data, VAR}}, {{1, 10, 10, 3}, output_data1}, param, fp16_enable); + } +} + +TEST_F(TestOpenCL_Pad, test0) { + std::vector, std::vector, std::vector, std::vector, + std::vector, float>> + cases = { + {"SimpleConstTest", + {1, 2, 2, 1}, + {3, 2, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 1, 1, 0, 0}, + 0}, + {"SimpleConstImageStyleTest", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 0}, + {"SimpleConst1DTest", {2}, {5}, {2, 3}, {0, 2, 3, 0, 0}, {1, 2}, 0}, + {"SimpleDynamicTest", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 0}, + {"AdvancedConstTest", + {1, 2, 3, 1}, + {2, 4, 6, 1}, + {1, 2, 3, 4, 5, 6}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 2, 0, 3, 0, 0}, + 0}, + {"AdvancedConstImageStyleTest", + {1, 2, 3, 1}, + {1, 4, 7, 1}, + {1, 2, 3, 4, 5, 6}, + {0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 1, 3, 0, 0}, + 0}, + {"AdvancedDynamicTest", + {1, 2, 3, 1}, + {1, 4, 7, 1}, + {1, 2, 3, 4, 5, 6}, + {0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 1, 3, 0, 0}, + 0}, + {"SimpleConstTestUint8", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 0}, + {"SimpleConstTestInt8", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 0}, + {"SimpleConstFloat32ValuedTestUint8", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, 5, 5, 5, 5, 5}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 5}, + {"SimpleConstFloat32ValuedTestInt8", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, 5, 5, 5, 5, 5}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 5}, + {"Simple4DConstFloat32ValuedTest", + {1, 1, 2, 1}, + {2, 1, 2, 2}, + {3, 3}, + {3, 5, 3, 5, 5, 5, 5, 5}, + {0, 1, 0, 0, 0, 0, 0, 1}, + 5}, + {"SimpleConstInt32ValuedTest", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, 5, 5, 5, 5, 5}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 5}, + {"SimpleDynamicTest", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 0}, + {"SimpleDynamicValuedTest", + {1, 2, 2, 1}, + {1, 4, 4, 1}, + {1, 2, 3, 4}, + {5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, 5, 5, 5, 5, 5}, + {0, 0, 1, 1, 1, 1, 0, 0}, + 5}, + {"AdvancedConstTest", + {1, 2, 3, 1}, + {1, 4, 7, 1}, + {1, 2, 3, 4, 5, 6}, + {0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 1, 3, 0, 0}, + 0}, + {"AdvancedDynamicTest", + {1, 2, 3, 1}, + {1, 4, 7, 1}, + {1, 2, 3, 4, 5, 6}, + {0, 1, 2, 3, 0, 0, 0, 0, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 1, 3, 0, 0}, + 0}, + }; - TEST_MAIN(param, Format_NHWC, Format_NHWC, Format_NHWC4, kNumberTypeFloat32, {1, 4, 4, 3}, {1, 10, 10, 3}, input_data, - expect_data); + for (auto &case_ : cases) { + auto &name = std::get<0>(case_); + auto &input_shape = std::get<1>(case_); + auto &output_shape = std::get<2>(case_); + auto input_data = std::get<3>(case_).data(); + auto output_data = std::get<4>(case_).data(); + auto &paddings = std::get<5>(case_); + auto constant_value = std::get<6>(case_); + std::cout << name << std::endl; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(paddings, constant_value); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } + } } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pooling_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pooling_tests.cc index 593a970559..9fd3991f6f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/pooling_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/pooling_tests.cc @@ -13,175 +13,56 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" - -namespace mindspore { - -class TestPoolingOpenCL : public mindspore::CommonTest {}; - -void InitPoolingParam(PoolingParameter *param) { - param->input_batch_ = 1; - param->input_h_ = 2; - param->input_w_ = 2; - param->input_channel_ = 4; - - param->output_batch_ = 1; - param->output_h_ = 1; - param->output_w_ = 1; - param->output_channel_ = 4; - - param->window_h_ = 2; - param->window_w_ = 2; - - param->stride_h_ = 2; - param->stride_w_ = 2; - - param->pad_u_ = 0; - param->pad_d_ = 0; - param->pad_l_ = 0; - param->pad_r_ = 0; -} - -void RunTestCasePooling(const std::vector &shape, void *input_data, void *output_data, bool enable_fp16, - PoolMode pool_mode) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - int n = shape[0]; - int h = shape[1]; - int w = shape[2]; - int c = shape[3]; - int oh = shape[4]; - int ow = shape[5]; - auto param = static_cast(malloc(sizeof(PoolingParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param create error."; - return; - } - InitPoolingParam(param); +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/pooling_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_Pooling : public CommonTest {}; + +namespace { +// PrimitiveType_Pooling: src/ops/populate/pooling_populate.cc +OpParameter *CreateParameter(PoolMode pool_mode, int window_h, int window_w, int stride_h, int stride_w, int pad_u, + int pad_d, int pad_l, int pad_r, RoundMode round_mode = RoundMode_No, + ActType act_type = ActType_No) { + auto *param = test::CreateParameter(schema::PrimitiveType_Pooling); + param->global_ = false; + param->window_w_ = window_w; + param->window_h_ = window_h; + param->pad_u_ = pad_u; + param->pad_d_ = pad_d; + param->pad_l_ = pad_l; + param->pad_r_ = pad_r; + param->stride_w_ = stride_w; + param->stride_h_ = stride_h; + param->avg_mode_ = 0; param->pool_mode_ = pool_mode; - std::vector input_shape = {n, h, w, c}; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - std::vector out_shape = {n, oh, ow, c}; - auto tensor_out_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - out_shape, schema::Format_NHWC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - } - - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - - MS_LOG(INFO) << "Test AvgPool2d passed"; -} - -TEST_F(TestPoolingOpenCL, AvgPoolingFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 4; - int oh = 1; - int ow = 1; - std::vector shape = {n, h, w, c, oh, ow}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - std::vector output_data = {6.0f, 7.0f, 8.0f, 9.0f}; - - RunTestCasePooling(shape, input_data.data(), output_data.data(), false, PoolMode_AvgPool); + param->round_mode_ = round_mode; + param->act_type_ = act_type; + return reinterpret_cast(param); } - -TEST_F(TestPoolingOpenCL, AvgPoolingFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 4; - int oh = 1; - int ow = 1; - std::vector shape = {n, h, w, c, oh, ow}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - std::vector output_data = {6.0f, 7.0f, 8.0f, 9.0f}; - - RunTestCasePooling(shape, input_data.data(), output_data.data(), true, PoolMode_AvgPool); +} // namespace + +TEST_F(TestOpenCL_Pooling, Avg) { + std::vector input_shape = {1, 2, 2, 4}; + std::vector output_shape = {1, 1, 1, 4}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + float output_data[] = {6, 7, 8, 9}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(PoolMode_AvgPool, 2, 2, 2, 2, 0, 0, 0, 0); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestPoolingOpenCL, MaxPoolingFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 4; - int oh = 1; - int ow = 1; - std::vector shape = {n, h, w, c, oh, ow}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - std::vector output_data = {12.0f, 13.0f, 14.0f, 15.0f}; - - RunTestCasePooling(shape, input_data.data(), output_data.data(), false, PoolMode_MaxPool); +TEST_F(TestOpenCL_Pooling, Max) { + std::vector input_shape = {1, 2, 2, 4}; + std::vector output_shape = {1, 1, 1, 4}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + float output_data[] = {12, 13, 14, 15}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(PoolMode_MaxPool, 2, 2, 2, 2, 0, 0, 0, 0); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestPoolingOpenCL, MaxPoolingFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 4; - int oh = 1; - int ow = 1; - std::vector shape = {n, h, w, c, oh, ow}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - std::vector output_data = {12.0f, 13.0f, 14.0f, 15.0f}; - - RunTestCasePooling(shape, input_data.data(), output_data.data(), true, PoolMode_MaxPool); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/power_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/power_tests.cc index 2308c0ebc8..55652d1fea 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/power_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/power_tests.cc @@ -21,10 +21,12 @@ #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/power.h" +// PrimitiveType_Power: src/ops/populate/power_populate.cc + using mindspore::lite::Tensor; using mindspore::schema::Format::Format_NHWC; -namespace mindspore { -class TestPowerOpenCLCI : public mindspore::CommonTest { +namespace mindspore::lite::opencl::test { +class TestPowerOpenCLCI : public CommonTest { public: TestPowerOpenCLCI() {} }; @@ -166,4 +168,4 @@ TEST_F(TestPowerOpenCLCI, broadcast) { TEST_MAIN(input_data1, input_data1, expect_data, data_type, shape_a, shape_b, output_shape, true); } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc index 9977a5e4ef..96707e4fa5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/prelu_tests.cc @@ -30,8 +30,10 @@ using mindspore::kernel::SubGraphOpenCLKernel; using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; -namespace mindspore { -class TestPReluOpenCL : public mindspore::CommonTest {}; +// PrimitiveType_PReLU: src/ops/populate/p_relu_populate.cc + +namespace mindspore::lite::opencl::test { +class TestPReluOpenCL : public CommonTest {}; void LoadDataPRelu(void *dst, size_t dst_size, const std::string &file_path) { if (file_path.empty()) { @@ -193,4 +195,4 @@ TEST_F(TestPReluOpenCL, PReluFp32_dim4) { delete param; delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc index 1eb894e9a8..05d10ca76f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reduce_tests.cc @@ -13,701 +13,80 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" - -namespace mindspore { -class TestReduceOpenCL : public mindspore::CommonTest { - public: - TestReduceOpenCL() {} -}; - -void RunTestCaseReduce(const std::vector &shape, void *input_data, void *output_data, bool enable_fp16, - int reduce_mode, bool WC = false) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(ReduceParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->axes_[0] = 1; - param->axes_[1] = 2; - if (WC) { - param->axes_[0] = 2; - param->axes_[1] = 3; - param->keep_dims_ = true; - } - param->num_axes_ = 2; - param->mode_ = reduce_mode; - int n = shape[0]; - int h = shape[1]; - int w = shape[2]; - int c = shape[3]; - std::vector input_shape = {n, h, w, c}; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - std::vector out_shape = {n, c}; - if (WC) { - out_shape = {n, h, 1, 1}; - } - auto tensor_out_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - out_shape, WC ? schema::Format_NHWC : schema::Format_NC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3)); - } - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/reduce_parameter.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_Reduce : public CommonTest {}; + +namespace { +// PrimitiveType_Reduce: src/ops/populate/reduce_populate.cc +// PrimitiveType_Mean: src/ops/populate/mean_populate.cc +OpParameter *CreateParameter(const std::vector &axis, schema::ReduceMode mode, bool keep_dims) { + auto *param = test::CreateParameter(schema::PrimitiveType_Reduce); + param->keep_dims_ = keep_dims; + param->reduce_to_end_ = false; + param->coeff = 0.f; + param->num_axes_ = axis.size(); + param->mode_ = mode; + for (int i = 0; i < axis.size(); ++i) { + param->axes_[i] = axis[i]; } - - MS_LOG(INFO) << "Test Reduce passed"; -} - -TEST_F(TestReduceOpenCL, ReduceMeanFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {4.5f, 5.5f, 6.5f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMean); -} - -TEST_F(TestReduceOpenCL, ReduceMeanFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {4.5f, 5.5f, 6.5f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMean); -} - -TEST_F(TestReduceOpenCL, ReduceMeanLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 3.0f, 1.0f, 6.0f, 8.0f, 6.0f, 4.0f, 1.0f, 3.0f, 5.0f, 4.0f, 5.0f, 4.0f, 0.0f, 2.0f, 4.0f, 1.0f, 3.0f, - 1.0f, 6.0f, 5.0f, 4.0f, 7.0f, 0.0f, 7.0f, 1.0f, 2.0f, 5.0f, 0.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 8.0f, - 5.0f, 7.0f, 6.0f, 2.0f, 5.0f, 3.0f, 2.0f, 9.0f, 1.0f, 0.0f, 2.0f, 0.0f, 6.0f, 0.0f, 3.0f, 6.0f, 0.0f, - 7.0f, 1.0f, 0.0f, 6.0f, 3.0f, 0.0f, 1.0f, 0.0f, 5.0f, 3.0f, 8.0f, 1.0f, 9.0f, 2.0f, 2.0f, 2.0f, 7.0f, - 7.0f, 6.0f, 7.0f, 0.0f, 5.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, 0.0f, 8.0f, 4.0f, 9.0f, 1.0f, 2.0f, 9.0f, - 9.0f, 6.0f, 0.0f, 8.0f, 5.0f, 2.0f, 9.0f, 3.0f, 1.0f, 9.0f, 0.0f, 4.0f, 6.0f, 0.0f, 5.0f, 2.0f, 3.0f}; - std::vector output_data = {3.971f, 4.559f, 3.294f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMean); -} - -TEST_F(TestReduceOpenCL, ReduceMeanLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 3.0f, 1.0f, 6.0f, 8.0f, 6.0f, 4.0f, 1.0f, 3.0f, 5.0f, 4.0f, 5.0f, 4.0f, 0.0f, 2.0f, 4.0f, 1.0f, 3.0f, - 1.0f, 6.0f, 5.0f, 4.0f, 7.0f, 0.0f, 7.0f, 1.0f, 2.0f, 5.0f, 0.0f, 6.0f, 7.0f, 8.0f, 9.0f, 0.0f, 8.0f, - 5.0f, 7.0f, 6.0f, 2.0f, 5.0f, 3.0f, 2.0f, 9.0f, 1.0f, 0.0f, 2.0f, 0.0f, 6.0f, 0.0f, 3.0f, 6.0f, 0.0f, - 7.0f, 1.0f, 0.0f, 6.0f, 3.0f, 0.0f, 1.0f, 0.0f, 5.0f, 3.0f, 8.0f, 1.0f, 9.0f, 2.0f, 2.0f, 2.0f, 7.0f, - 7.0f, 6.0f, 7.0f, 0.0f, 5.0f, 4.0f, 2.0f, 6.0f, 8.0f, 2.0f, 0.0f, 8.0f, 4.0f, 9.0f, 1.0f, 2.0f, 9.0f, - 9.0f, 6.0f, 0.0f, 8.0f, 5.0f, 2.0f, 9.0f, 3.0f, 1.0f, 9.0f, 0.0f, 4.0f, 6.0f, 0.0f, 5.0f, 2.0f, 3.0f}; - std::vector output_data = {3.971f, 4.559f, 3.294f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMean); -} - -TEST_F(TestReduceOpenCL, ReduceMeanWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {1.5f, 5.5f, 9.5f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMean, true); -} - -TEST_F(TestReduceOpenCL, ReduceMeanWCLocalFp32) { - int n = 1; - int h = 5; - int w = 17; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 6.0f, 3.0f, 6.0f, 1.0f, 4.0f, 2.0f, 5.0f, 1.0f, 7.0f, 5.0f, 7.0f, 3.0f, 0.0f, 2.0f, 9.0f, 8.0f, 3.0f, 1.0f, 6.0f, - 8.0f, 6.0f, 6.0f, 3.0f, 0.0f, 6.0f, 3.0f, 8.0f, 0.0f, 6.0f, 1.0f, 0.0f, 9.0f, 4.0f, 4.0f, 9.0f, 4.0f, 9.0f, 5.0f, - 0.0f, 1.0f, 4.0f, 6.0f, 4.0f, 0.0f, 9.0f, 3.0f, 6.0f, 6.0f, 7.0f, 1.0f, 7.0f, 8.0f, 6.0f, 0.0f, 2.0f, 6.0f, 4.0f, - 4.0f, 3.0f, 7.0f, 7.0f, 5.0f, 2.0f, 3.0f, 4.0f, 3.0f, 1.0f, 5.0f, 4.0f, 8.0f, 7.0f, 5.0f, 0.0f, 7.0f, 5.0f, 5.0f, - 0.0f, 3.0f, 4.0f, 0.0f, 6.0f, 5.0f, 4.0f, 6.0f, 2.0f, 0.0f, 8.0f, 6.0f, 4.0f, 6.0f, 3.0f, 2.0f, 6.0f, 4.0f, 8.0f, - 4.0f, 8.0f, 2.0f, 0.0f, 0.0f, 9.0f, 4.0f, 3.0f, 4.0f, 1.0f, 7.0f, 9.0f, 1.0f, 9.0f, 4.0f, 2.0f, 8.0f, 3.0f, 5.0f, - 8.0f, 7.0f, 8.0f, 8.0f, 4.0f, 8.0f, 2.0f, 8.0f, 9.0f, 4.0f, 5.0f, 0.0f, 2.0f, 1.0f, 0.0f, 8.0f, 4.0f, 7.0f, 2.0f, - 4.0f, 5.0f, 0.0f, 0.0f, 7.0f, 2.0f, 0.0f, 2.0f, 7.0f, 1.0f, 1.0f, 0.0f, 1.0f, 2.0f, 1.0f, 3.0f, 7.0f, 7.0f, 3.0f, - 2.0f, 3.0f, 1.0f, 7.0f, 2.0f, 2.0f, 2.0f, 9.0f, 3.0f, 6.0f, 1.0f, 8.0f, 0.0f, 1.0f, 2.0f, 0.0f, 9.0f, 5.0f}; - std::vector output_data = {4.206f, 4.441f, 4.265f, 4.706f, 3.147f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMean, true); -} - -TEST_F(TestReduceOpenCL, ReduceSumFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {18.0f, 22.0f, 26.0f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSum); -} - -TEST_F(TestReduceOpenCL, ReduceSumFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {18.0f, 22.0f, 26.0f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceSum); -} - -TEST_F(TestReduceOpenCL, ReduceSumLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 8.0f, 1.0f, 8.0f, 9.0f, 6.0f, 9.0f, 4.0f, 4.0f, 4.0f, 2.0f, 3.0f, 9.0f, 3.0f, 4.0f, 8.0f, 1.0f, 9.0f, - 5.0f, 2.0f, 5.0f, 6.0f, 3.0f, 8.0f, 3.0f, 7.0f, 1.0f, 3.0f, 1.0f, 9.0f, 4.0f, 0.0f, 9.0f, 7.0f, 7.0f, - 5.0f, 0.0f, 2.0f, 4.0f, 8.0f, 7.0f, 3.0f, 0.0f, 4.0f, 8.0f, 5.0f, 3.0f, 8.0f, 2.0f, 5.0f, 3.0f, 5.0f, - 9.0f, 4.0f, 3.0f, 9.0f, 7.0f, 2.0f, 4.0f, 7.0f, 0.0f, 3.0f, 9.0f, 6.0f, 6.0f, 9.0f, 2.0f, 1.0f, 0.0f, - 7.0f, 1.0f, 7.0f, 2.0f, 0.0f, 6.0f, 9.0f, 4.0f, 7.0f, 0.0f, 7.0f, 0.0f, 4.0f, 8.0f, 6.0f, 0.0f, 3.0f, - 2.0f, 1.0f, 2.0f, 9.0f, 6.0f, 2.0f, 6.0f, 2.0f, 9.0f, 4.0f, 0.0f, 1.0f, 9.0f, 7.0f, 6.0f, 9.0f, 8.0f}; - std::vector output_data = {143.000f, 191.000f, 145.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSum); -} - -TEST_F(TestReduceOpenCL, ReduceSumLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 8.0f, 1.0f, 8.0f, 9.0f, 6.0f, 9.0f, 4.0f, 4.0f, 4.0f, 2.0f, 3.0f, 9.0f, 3.0f, 4.0f, 8.0f, 1.0f, 9.0f, - 5.0f, 2.0f, 5.0f, 6.0f, 3.0f, 8.0f, 3.0f, 7.0f, 1.0f, 3.0f, 1.0f, 9.0f, 4.0f, 0.0f, 9.0f, 7.0f, 7.0f, - 5.0f, 0.0f, 2.0f, 4.0f, 8.0f, 7.0f, 3.0f, 0.0f, 4.0f, 8.0f, 5.0f, 3.0f, 8.0f, 2.0f, 5.0f, 3.0f, 5.0f, - 9.0f, 4.0f, 3.0f, 9.0f, 7.0f, 2.0f, 4.0f, 7.0f, 0.0f, 3.0f, 9.0f, 6.0f, 6.0f, 9.0f, 2.0f, 1.0f, 0.0f, - 7.0f, 1.0f, 7.0f, 2.0f, 0.0f, 6.0f, 9.0f, 4.0f, 7.0f, 0.0f, 7.0f, 0.0f, 4.0f, 8.0f, 6.0f, 0.0f, 3.0f, - 2.0f, 1.0f, 2.0f, 9.0f, 6.0f, 2.0f, 6.0f, 2.0f, 9.0f, 4.0f, 0.0f, 1.0f, 9.0f, 7.0f, 6.0f, 9.0f, 8.0f}; - std::vector output_data = {143.000f, 191.000f, 145.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceSum); -} - -TEST_F(TestReduceOpenCL, ReduceSumWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {6.0f, 22.0f, 38.0f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSum, true); -} - -TEST_F(TestReduceOpenCL, ReduceSumWCLocalFp32) { - int n = 1; - int h = 3; - int w = 5; - int c = 17; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 4.0f, 7.0f, 2.0f, 9.0f, 1.0f, 4.0f, 0.0f, 1.0f, 0.0f, 7.0f, 8.0f, 0.0f, 2.0f, 8.0f, 2.0f, 0.0f, 4.0f, 8.0f, 3.0f, - 9.0f, 5.0f, 9.0f, 7.0f, 0.0f, 3.0f, 3.0f, 1.0f, 1.0f, 8.0f, 6.0f, 4.0f, 7.0f, 6.0f, 5.0f, 7.0f, 8.0f, 2.0f, 0.0f, - 0.0f, 4.0f, 1.0f, 1.0f, 4.0f, 6.0f, 0.0f, 5.0f, 1.0f, 0.0f, 3.0f, 9.0f, 3.0f, 7.0f, 8.0f, 1.0f, 6.0f, 9.0f, 2.0f, - 5.0f, 7.0f, 2.0f, 9.0f, 8.0f, 0.0f, 2.0f, 0.0f, 4.0f, 3.0f, 4.0f, 3.0f, 5.0f, 3.0f, 5.0f, 2.0f, 2.0f, 1.0f, 9.0f, - 8.0f, 7.0f, 0.0f, 8.0f, 0.0f, 4.0f, 0.0f, 8.0f, 4.0f, 8.0f, 2.0f, 6.0f, 3.0f, 7.0f, 6.0f, 8.0f, 3.0f, 6.0f, 4.0f, - 8.0f, 3.0f, 8.0f, 1.0f, 0.0f, 9.0f, 6.0f, 4.0f, 9.0f, 0.0f, 6.0f, 8.0f, 6.0f, 7.0f, 8.0f, 2.0f, 3.0f, 3.0f, 7.0f, - 2.0f, 9.0f, 1.0f, 9.0f, 3.0f, 5.0f, 4.0f, 6.0f, 2.0f, 7.0f, 1.0f, 1.0f, 0.0f, 0.0f, 4.0f, 9.0f, 1.0f, 7.0f, 3.0f, - 2.0f, 1.0f, 4.0f, 6.0f, 7.0f, 9.0f, 2.0f, 2.0f, 8.0f, 3.0f, 2.0f, 4.0f, 1.0f, 7.0f, 6.0f, 8.0f, 6.0f, 9.0f, 8.0f, - 6.0f, 8.0f, 3.0f, 4.0f, 8.0f, 5.0f, 6.0f, 9.0f, 9.0f, 2.0f, 0.0f, 5.0f, 0.0f, 0.0f, 2.0f, 4.0f, 2.0f, 2.0f, 6.0f, - 9.0f, 3.0f, 6.0f, 0.0f, 5.0f, 4.0f, 3.0f, 8.0f, 6.0f, 3.0f, 2.0f, 8.0f, 9.0f, 2.0f, 7.0f, 1.0f, 2.0f, 4.0f, 9.0f, - 3.0f, 7.0f, 9.0f, 2.0f, 4.0f, 2.0f, 7.0f, 8.0f, 8.0f, 6.0f, 3.0f, 4.0f, 6.0f, 3.0f, 1.0f, 7.0f, 9.0f, 3.0f, 5.0f, - 9.0f, 7.0f, 1.0f, 8.0f, 6.0f, 1.0f, 9.0f, 2.0f, 8.0f, 2.0f, 9.0f, 8.0f, 3.0f, 2.0f, 7.0f, 8.0f, 9.0f, 3.0f, 6.0f, - 0.0f, 8.0f, 5.0f, 7.0f, 1.0f, 5.0f, 2.0f, 9.0f, 3.0f, 0.0f, 5.0f, 9.0f, 3.0f, 2.0f, 0.0f, 2.0f, 7.0f, 5.0f, 7.0f, - 4.0f, 7.0f, 0.0f, 9.0f, 8.0f, 8.0f, 8.0f, 8.0f}; - std::vector output_data = {344.000f, 395.000f, 434.000f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSum, true); -} - -TEST_F(TestReduceOpenCL, ReduceMinFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {3.0f, -5.0f, 4.0f, 3.0f, -1.0f, 1.0f, -5.0f, -2.0f, -3.0f, 5.0f, -1.0f, 5.0f}; - std::vector output_data = {-5.000f, -5.000f, -3.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMin); -} - -TEST_F(TestReduceOpenCL, ReduceMinFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {3.0f, -5.0f, 4.0f, 3.0f, -1.0f, 1.0f, -5.0f, -2.0f, -3.0f, 5.0f, -1.0f, 5.0f}; - std::vector output_data = {-5.000f, -5.000f, -3.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMin); -} - -TEST_F(TestReduceOpenCL, ReduceMinLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 2.0f, -8.0f, -4.0f, -7.0f, 7.0f, 3.0f, 7.0f, -3.0f, 2.0f, -9.0f, -6.0f, 3.0f, -8.0f, 1.0f, -10.0f, - 1.0f, -10.0f, 2.0f, -5.0f, 6.0f, -5.0f, 7.0f, 3.0f, 4.0f, 3.0f, -3.0f, 5.0f, -1.0f, -1.0f, -6.0f, - -4.0f, 9.0f, 5.0f, -1.0f, 3.0f, 3.0f, 9.0f, 5.0f, -10.0f, -1.0f, -8.0f, 9.0f, -4.0f, 8.0f, 3.0f, - -1.0f, -2.0f, 8.0f, -1.0f, -7.0f, 2.0f, 4.0f, 2.0f, 4.0f, 6.0f, -1.0f, 7.0f, 4.0f, -3.0f, 0.0f, - -2.0f, -1.0f, -10.0f, -2.0f, 6.0f, 3.0f, -4.0f, -9.0f, -5.0f, -8.0f, 0.0f, -7.0f, 9.0f, 2.0f, 7.0f, - -5.0f, 8.0f, 4.0f, 5.0f, 9.0f, -3.0f, 2.0f, 0.0f, -4.0f, -1.0f, -7.0f, -10.0f, -10.0f, -3.0f, 9.0f, - -8.0f, 1.0f, 1.0f, -5.0f, -10.0f, -1.0f, 8.0f, -2.0f, 1.0f, -4.0f, 1.0f, 0.0f}; - std::vector output_data = {-10.000f, -10.000f, -10.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMin); -} - -TEST_F(TestReduceOpenCL, ReduceMinLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 2.0f, -8.0f, -4.0f, -7.0f, 7.0f, 3.0f, 7.0f, -3.0f, 2.0f, -9.0f, -6.0f, 3.0f, -8.0f, 1.0f, -10.0f, - 1.0f, -10.0f, 2.0f, -5.0f, 6.0f, -5.0f, 7.0f, 3.0f, 4.0f, 3.0f, -3.0f, 5.0f, -1.0f, -1.0f, -6.0f, - -4.0f, 9.0f, 5.0f, -1.0f, 3.0f, 3.0f, 9.0f, 5.0f, -10.0f, -1.0f, -8.0f, 9.0f, -4.0f, 8.0f, 3.0f, - -1.0f, -2.0f, 8.0f, -1.0f, -7.0f, 2.0f, 4.0f, 2.0f, 4.0f, 6.0f, -1.0f, 7.0f, 4.0f, -3.0f, 0.0f, - -2.0f, -1.0f, -10.0f, -2.0f, 6.0f, 3.0f, -4.0f, -9.0f, -5.0f, -8.0f, 0.0f, -7.0f, 9.0f, 2.0f, 7.0f, - -5.0f, 8.0f, 4.0f, 5.0f, 9.0f, -3.0f, 2.0f, 0.0f, -4.0f, -1.0f, -7.0f, -10.0f, -10.0f, -3.0f, 9.0f, - -8.0f, 1.0f, 1.0f, -5.0f, -10.0f, -1.0f, 8.0f, -2.0f, 1.0f, -4.0f, 1.0f, 0.0f}; - std::vector output_data = {-10.000f, -10.000f, -10.000f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMin); -} - -TEST_F(TestReduceOpenCL, ReduceMinWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {-0.080f, 0.481f, -0.853f, -0.838f, 0.557f, 0.255f, - 0.116f, 0.446f, -0.051f, -0.095f, 0.552f, 0.077f}; - std::vector output_data = {-0.853f, 0.116f, -0.095f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMin, true); -} - -TEST_F(TestReduceOpenCL, ReduceMinWCLocalFp32) { - int n = 1; - int h = 5; - int w = 17; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.399f, -0.139f, 0.238f, 0.779f, -0.894f, 0.343f, -0.955f, 0.593f, 0.448f, 0.816f, 0.841f, -0.614f, 0.636f, - 0.116f, -0.031f, -0.109f, 0.770f, 0.962f, 0.307f, -0.170f, 0.789f, 0.197f, 0.530f, -0.883f, 0.753f, 0.385f, - -0.158f, 0.237f, 0.971f, -0.781f, -0.523f, -0.547f, 0.257f, -0.034f, 0.660f, -0.666f, -0.379f, 0.092f, -0.130f, - 0.369f, 0.664f, -0.747f, -0.687f, -0.628f, -0.434f, 0.736f, 0.673f, 0.125f, -0.854f, 0.007f, 0.038f, 0.024f, - 0.706f, -0.806f, 0.042f, 0.532f, -0.545f, -0.942f, 0.778f, -0.419f, 0.931f, -0.848f, 0.501f, -0.415f, -0.292f, - -0.575f, 0.192f, -0.825f, 0.256f, -0.227f, -0.795f, 0.319f, 0.101f, -0.337f, 0.940f, -0.724f, 0.453f, -0.646f, - -0.225f, -0.303f, 0.093f, 0.851f, -0.467f, -0.657f, 0.980f, 0.867f, 0.606f, 0.356f, 0.982f, -0.199f, 0.816f, - 0.984f, -0.466f, -0.857f, -0.070f, -0.562f, 0.744f, 0.477f, 0.831f, -0.064f, 0.891f, -0.813f, -0.341f, 0.969f, - 0.538f, 0.233f, -0.545f, 0.994f, 0.241f, -0.829f, -0.272f, -0.420f, 0.607f, 0.658f, -0.188f, 0.134f, 0.277f, - -0.173f, 0.373f, 0.286f, -0.805f, 0.455f, 0.461f, 0.893f, -0.457f, 0.360f, -0.706f, -0.848f, 0.032f, -0.566f, - 0.014f, 0.507f, -0.694f, -0.663f, -0.783f, 0.459f, -0.613f, -0.496f, 0.332f, 0.829f, -0.437f, 0.759f, -0.061f, - -0.400f, -0.561f, 0.471f, -0.042f, 0.073f, 0.546f, -0.557f, 0.602f, 0.011f, -0.214f, 0.733f, 0.289f, -0.847f, - -0.637f, -0.791f, 0.519f, 0.449f, -0.390f, -0.296f, 0.622f, 0.345f, 0.525f, -0.205f, -0.626f, 0.089f, -0.811f, - 0.741f}; - std::vector output_data = {-0.955f, -0.942f, -0.857f, -0.848f, -0.847f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMin, true); -} - -TEST_F(TestReduceOpenCL, ReduceMaxFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.123f, 0.975f, 0.092f, 0.364f, 0.033f, -0.140f, - -0.566f, 0.693f, 0.540f, -0.588f, -0.992f, -0.386f}; - std::vector output_data = {0.364f, 0.975f, 0.540f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMax); -} - -TEST_F(TestReduceOpenCL, ReduceMaxFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.123f, 0.975f, 0.092f, 0.364f, 0.033f, -0.140f, - -0.566f, 0.693f, 0.540f, -0.588f, -0.992f, -0.386f}; - std::vector output_data = {0.364f, 0.975f, 0.540f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMax); -} - -TEST_F(TestReduceOpenCL, ReduceMaxLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.113f, -0.633f, 0.603f, 0.447f, -0.588f, 0.039f, 0.494f, -0.379f, -0.018f, -0.317f, 0.620f, 0.460f, 0.732f, - 0.980f, 0.376f, 0.481f, -0.371f, -0.219f, -0.496f, 0.670f, -0.159f, 0.961f, 0.036f, 0.633f, -0.118f, -0.300f, - 0.971f, -0.236f, -0.095f, -0.705f, -0.495f, -0.403f, -0.131f, -0.084f, -0.339f, 0.031f, -0.582f, 0.893f, -0.311f, - 0.501f, -0.623f, -0.523f, -0.177f, -0.438f, 0.626f, 0.028f, -0.106f, 0.916f, -0.504f, 0.678f, 0.358f, -0.951f, - 0.741f, -0.577f, -0.544f, -0.952f, -0.133f, 0.441f, -0.376f, -0.246f, 0.301f, 0.025f, -0.904f, -0.337f, 0.132f, - -0.800f, 0.226f, -0.135f, -0.617f, -0.871f, -0.393f, -0.195f, 0.591f, 0.034f, -0.040f, 0.377f, -0.106f, 0.265f, - -0.883f, -0.678f, -0.795f, -0.094f, -0.272f, -0.954f, 0.569f, -0.910f, -0.288f, -0.978f, 0.262f, -0.973f, -0.750f, - 0.460f, 0.956f, 0.696f, -0.938f, 0.537f, 0.516f, -0.339f, -0.289f, 0.498f, 0.135f, -0.649f}; - std::vector output_data = {0.961f, 0.980f, 0.971f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMax); -} - -TEST_F(TestReduceOpenCL, ReduceMaxLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.314f, -0.714f, -0.736f, -0.459f, -0.819f, -0.530f, -0.275f, -0.141f, -0.797f, 0.522f, -0.651f, 0.576f, -0.644f, - 0.725f, 0.208f, -0.529f, -0.776f, 0.986f, -0.862f, -0.327f, 0.922f, 0.554f, -0.401f, 0.972f, -0.485f, 0.423f, - -0.611f, -0.768f, 0.444f, -0.678f, -0.734f, 0.572f, 0.413f, 0.612f, -0.783f, -0.138f, -0.624f, -0.284f, 0.873f, - -0.298f, 0.630f, -0.463f, 0.195f, 0.196f, 0.167f, 0.227f, -0.015f, 0.436f, -0.898f, 0.031f, -0.149f, -0.218f, - 0.184f, -0.426f, 0.794f, 0.846f, 0.624f, -0.889f, -0.336f, 0.401f, -0.820f, -0.583f, 0.337f, 0.175f, 0.228f, - -0.626f, -0.505f, -0.088f, 0.833f, -0.366f, 0.392f, 0.727f, -0.598f, -0.851f, 0.007f, -0.707f, 0.575f, 0.243f, - -0.372f, -0.141f, 0.679f, -0.646f, 0.422f, 0.322f, -0.294f, 0.831f, 0.929f, -0.414f, -0.208f, -0.111f, 0.146f, - -0.489f, -0.808f, -0.635f, 0.811f, 0.544f, -0.131f, 0.707f, 0.787f, 0.603f, -0.149f, -0.095f}; - std::vector output_data = {0.794f, 0.846f, 0.986f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceMax); + return reinterpret_cast(param); } +} // namespace -TEST_F(TestReduceOpenCL, ReduceMaxWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {0.435f, -0.949f, 0.580f, 0.858f, -0.465f, 0.255f, - -0.561f, -0.444f, -0.603f, 0.266f, 0.031f, -0.638f}; - std::vector output_data = {0.858f, 0.255f, 0.266f}; +TEST_F(TestOpenCL_Reduce, Mean) { + std::vector axis = {1, 2}; + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 3}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {4.5, 5.5, 6.5f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMax, true); -} - -TEST_F(TestReduceOpenCL, ReduceMaxWCLocalFp32) { - int n = 1; - int h = 5; - int w = 17; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.543f, 0.620f, 0.175f, -0.275f, -0.570f, 0.516f, -0.401f, -0.157f, 0.460f, -0.072f, -0.322f, 0.208f, 0.385f, - 0.919f, -0.265f, 0.256f, 0.383f, -0.399f, 0.183f, 0.363f, -0.779f, -0.191f, -0.446f, 0.063f, -0.671f, 0.823f, - -0.049f, -0.182f, -0.409f, 0.589f, -0.804f, -0.461f, -0.407f, -0.119f, 0.833f, 0.718f, -0.366f, 0.993f, 0.844f, - -0.018f, -0.203f, -0.004f, -0.610f, -0.461f, 0.938f, -0.708f, -0.831f, -0.147f, 0.855f, 0.998f, 0.412f, -0.393f, - -0.706f, -0.127f, 0.845f, -0.236f, -0.341f, 0.299f, 0.793f, 0.794f, -0.634f, -0.663f, -0.568f, -0.428f, -0.921f, - 0.904f, 0.933f, -0.985f, -0.760f, -0.673f, -0.080f, 0.235f, 0.539f, -0.341f, -0.899f, 0.527f, -0.210f, -0.151f, - 0.148f, -0.184f, -0.103f, -0.345f, -0.772f, -0.960f, -0.282f, -0.486f, -0.986f, -0.591f, 0.702f, 0.973f, 0.269f, - 0.058f, -0.831f, -0.677f, -0.665f, -0.403f, 0.241f, -0.365f, 0.741f, 0.603f, 0.347f, 0.812f, -0.515f, -0.085f, - 0.251f, 0.631f, 0.819f, 0.622f, -0.615f, -0.122f, 0.064f, 0.445f, -0.508f, -0.023f, -0.072f, -0.423f, 0.547f, - -0.841f, -0.308f, 0.924f, -0.187f, 0.601f, 0.879f, -0.868f, 0.395f, -0.307f, 0.977f, -0.300f, 0.737f, 0.022f, - 0.106f, -0.520f, -0.673f, -0.351f, 0.367f, 0.588f, -0.223f, 0.062f, 0.870f, -0.017f, 0.583f, 0.405f, 0.507f, - -0.457f, 0.196f, 0.048f, -0.173f, 0.596f, -0.017f, -0.245f, -0.433f, -0.852f, 0.058f, 0.237f, 0.280f, -0.129f, - -0.224f, 0.869f, -0.781f, -0.029f, -0.715f, 0.497f, -0.341f, 0.230f, -0.572f, 0.718f, -0.408f, -0.998f, -0.752f, - -0.701f}; - std::vector output_data = {0.919f, 0.998f, 0.973f, 0.977f, 0.870f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceMax, true); -} - -TEST_F(TestReduceOpenCL, ReduceProdFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {4.0f, 3.0f, 1.0f, 4.0f, 1.0f, 3.0f, 1.0f, 4.0f, 2.0f, 4.0f, 4.0f, 3.0f}; - std::vector output_data = {64.0f, 48.0f, 18.0f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceProd); -} - -TEST_F(TestReduceOpenCL, ReduceProdFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {2.0f, 1.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, 3.0f, 2.0f, 3.0f, 1.0f, 1.0f}; - std::vector output_data = {24.0f, 12.0f, 6.0f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceProd); -} - -TEST_F(TestReduceOpenCL, ReduceProdLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.304f, 2.304f, 1.391f, 1.072f, 0.351f, 0.641f, 0.120f, 2.382f, 0.460f, 1.672f, 0.553f, 1.534f, 1.423f, - 0.892f, 2.900f, 1.953f, 1.745f, 1.171f, 1.717f, 1.291f, 1.572f, 2.388f, 0.154f, 0.252f, 0.794f, 0.981f, - 0.366f, 1.372f, 1.778f, 1.848f, 1.023f, 1.124f, 2.045f, 2.374f, 1.965f, 0.260f, 1.306f, 1.889f, 1.144f, - 1.816f, 2.189f, 2.215f, 1.913f, 2.577f, 2.910f, 1.712f, 0.342f, 1.349f, 0.215f, 2.717f, 1.813f, 2.764f, - 1.989f, 1.710f, 0.156f, 2.293f, 2.648f, 1.281f, 1.078f, 2.757f, 0.746f, 0.238f, 0.235f, 0.123f, 0.730f, - 1.558f, 1.798f, 0.993f, 2.479f, 1.930f, 1.687f, 1.078f, 0.600f, 0.710f, 1.926f, 0.848f, 0.984f, 0.568f, - 0.983f, 1.068f, 2.362f, 2.770f, 2.184f, 2.883f, 1.177f, 0.232f, 0.782f, 1.340f, 2.029f, 1.524f, 0.159f, - 2.892f, 1.225f, 0.638f, 2.537f, 0.813f, 0.337f, 1.871f, 0.602f, 2.387f, 1.209f, 2.886f}; - std::vector output_data = {0.103f, 229.081f, 1030.031f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceProd); -} - -TEST_F(TestReduceOpenCL, ReduceProdLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 2.843f, 2.398f, 0.998f, 1.164f, 1.048f, 0.880f, 2.112f, 1.354f, 2.892f, 0.755f, 2.033f, 1.140f, 1.117f, - 2.550f, 2.340f, 2.905f, 0.114f, 0.773f, 2.589f, 2.404f, 1.037f, 0.561f, 2.671f, 0.419f, 1.723f, 2.041f, - 2.888f, 2.440f, 1.668f, 0.821f, 0.918f, 1.251f, 1.141f, 2.497f, 0.408f, 2.384f, 0.457f, 2.754f, 0.624f, - 0.198f, 0.599f, 2.566f, 1.279f, 2.973f, 0.363f, 2.222f, 1.144f, 2.715f, 1.135f, 0.900f, 1.906f, 0.982f, - 2.211f, 2.113f, 0.585f, 1.766f, 1.612f, 1.796f, 0.607f, 1.121f, 1.277f, 2.600f, 1.446f, 1.467f, 1.828f, - 2.227f, 0.950f, 2.702f, 1.297f, 0.552f, 2.476f, 1.404f, 2.487f, 0.615f, 0.205f, 0.577f, 0.809f, 1.432f, - 1.668f, 2.243f, 2.711f, 2.221f, 0.183f, 2.964f, 1.174f, 0.928f, 2.703f, 0.427f, 0.410f, 1.436f, 1.427f, - 1.144f, 2.970f, 2.014f, 2.380f, 1.286f, 2.570f, 2.765f, 1.757f, 0.513f, 2.449f, 0.770f}; - std::vector output_data = {715.940f, 12232.266f, 46763.609f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceProd); -} - -TEST_F(TestReduceOpenCL, ReduceProdWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {1.691f, 2.804f, 0.184f, 1.760f, 0.255f, 1.461f, - 2.751f, 2.487f, 1.304f, 0.686f, 0.702f, 0.393f}; - std::vector output_data = {1.536f, 2.549f, 0.247f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceProd, true); -} - -TEST_F(TestReduceOpenCL, ReduceProdWCLocalFp32) { - int n = 1; - int h = 5; - int w = 17; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 1.360f, 0.615f, 0.894f, 1.357f, 0.701f, 1.430f, 1.488f, 0.701f, 0.688f, 0.869f, 1.321f, 0.836f, 1.160f, 1.460f, - 1.215f, 1.157f, 0.855f, 0.992f, 0.724f, 0.741f, 0.921f, 1.496f, 1.285f, 1.040f, 0.695f, 1.264f, 0.998f, 0.925f, - 1.170f, 1.384f, 1.413f, 0.617f, 0.743f, 1.299f, 0.998f, 1.131f, 1.491f, 1.371f, 0.808f, 1.001f, 0.602f, 0.812f, - 1.299f, 1.500f, 0.867f, 0.970f, 1.174f, 0.887f, 1.409f, 1.144f, 0.969f, 1.303f, 1.154f, 0.796f, 0.952f, 1.347f, - 0.794f, 0.601f, 1.191f, 1.310f, 0.619f, 0.961f, 0.951f, 1.395f, 0.861f, 1.177f, 1.274f, 0.701f, 0.758f, 0.635f, - 1.256f, 1.450f, 0.900f, 1.313f, 1.401f, 0.904f, 0.835f, 0.767f, 1.258f, 1.467f, 1.278f, 0.652f, 0.731f, 0.648f, - 1.308f, 1.199f, 1.485f, 1.352f, 0.639f, 1.291f, 0.924f, 0.762f, 0.791f, 1.392f, 1.328f, 1.190f, 1.458f, 1.193f, - 1.109f, 1.098f, 1.117f, 1.197f, 1.097f, 0.879f, 1.175f, 0.723f, 1.260f, 1.454f, 0.703f, 0.729f, 1.467f, 0.918f, - 0.631f, 0.750f, 1.292f, 1.208f, 0.972f, 0.621f, 0.673f, 0.710f, 1.482f, 1.092f, 1.162f, 1.432f, 0.774f, 1.132f, - 1.258f, 0.761f, 0.799f, 1.071f, 1.099f, 1.484f, 0.674f, 0.916f, 0.684f, 0.842f, 1.412f, 0.956f, 1.199f, 0.969f, - 0.957f, 1.124f, 0.937f, 0.815f, 1.308f, 1.448f, 1.059f, 1.373f, 0.804f, 1.172f, 1.387f, 0.826f, 0.783f, 0.707f, - 1.159f, 0.927f, 0.602f, 0.932f, 1.024f, 1.266f, 0.885f, 0.920f, 1.120f, 0.973f, 0.964f, 1.365f, 0.926f, 0.709f, - 1.177f, 0.615f}; - std::vector output_data = {1.544f, 1.984f, 5.516f, 0.247f, 0.919f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceProd, true); -} - -TEST_F(TestReduceOpenCL, ReduceSumSquareFp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {-0.081f, 0.305f, -0.291f, 0.777f, 0.338f, 0.482f, - 0.959f, -0.695f, -0.055f, 0.001f, 0.723f, -0.112f}; - std::vector output_data = {1.530f, 1.213f, 0.333f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSumSquare); -} - -TEST_F(TestReduceOpenCL, ReduceSumSquareFp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = {-0.730f, -0.938f, 0.236f, -0.631f, -0.058f, -0.625f, - 0.097f, -0.343f, 0.120f, -0.339f, 0.003f, -0.288f}; - std::vector output_data = {1.055f, 1.001f, 0.544f}; - - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceSumSquare); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis, schema::ReduceMode_ReduceMean, false); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestReduceOpenCL, ReduceSumSquareLocalFp32) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.025f, -0.130f, 0.292f, 0.128f, 0.360f, -0.181f, -0.179f, 0.469f, 0.434f, -0.417f, -0.414f, 0.998f, 0.654f, - -0.102f, 0.039f, -0.822f, -0.155f, 0.113f, 0.204f, 0.615f, 0.844f, -0.364f, 0.486f, 0.799f, 0.452f, -0.884f, - -0.006f, 0.888f, -0.567f, 0.620f, -0.365f, -0.096f, -0.300f, -0.263f, 0.945f, -0.900f, -0.798f, -0.536f, -0.506f, - 0.148f, -0.496f, 0.344f, 0.096f, 0.881f, -0.848f, 0.401f, -0.724f, 0.806f, -0.550f, 0.377f, 0.560f, -0.144f, - 0.439f, 0.038f, -0.985f, 0.246f, 0.233f, -0.864f, 0.427f, -0.723f, 0.592f, -0.642f, 0.376f, 0.769f, 0.020f, - 0.965f, 0.532f, -0.448f, -0.168f, 0.502f, 0.900f, 0.468f, 0.834f, -0.768f, -0.337f, 0.874f, 0.941f, -0.449f, - -0.330f, 0.605f, 0.081f, 0.804f, -0.823f, -0.270f, 0.117f, 0.040f, 0.316f, 0.951f, -0.920f, 0.599f, 0.855f, - 0.075f, -0.898f, -0.298f, 0.208f, 0.899f, 0.751f, -0.421f, 0.478f, -0.106f, -0.031f, 0.974f}; - std::vector output_data = {11.569f, 10.620f, 11.552f}; +TEST_F(TestOpenCL_Reduce, Sum) { + std::vector axis = {1, 2}; + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 3}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {18, 22, 26}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSumSquare); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis, schema::ReduceMode_ReduceSum, false); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestReduceOpenCL, ReduceSumSquareLocalFp16) { - int n = 1; - int h = 17; - int w = 2; - int c = 3; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - 0.931f, 0.611f, 0.921f, -0.873f, 0.084f, -0.677f, -0.366f, -0.627f, -0.359f, 0.217f, -0.825f, -0.453f, 0.486f, - 0.675f, -0.968f, 0.070f, 0.300f, -0.508f, -0.423f, -0.741f, -0.390f, 0.649f, -0.313f, -0.921f, -0.130f, -0.212f, - -0.591f, 0.135f, -0.556f, -0.963f, -0.509f, -0.480f, 0.694f, -0.913f, 0.778f, 0.498f, -0.520f, 0.271f, 0.087f, - 0.265f, 0.905f, 0.669f, 0.257f, -0.307f, 0.789f, 0.117f, 0.468f, 0.728f, 0.372f, -0.475f, 0.195f, 0.163f, - 0.766f, -0.504f, 0.876f, -0.203f, 0.636f, -0.340f, -0.126f, 0.368f, -0.173f, -0.149f, 0.492f, -0.220f, 0.521f, - -0.844f, -0.684f, -0.718f, 0.255f, -0.148f, -0.891f, 0.577f, -0.880f, 0.005f, -0.904f, 0.282f, 0.473f, -0.512f, - -0.385f, -0.674f, 0.443f, -0.172f, 0.224f, 0.720f, -0.050f, 0.003f, -0.743f, 0.025f, 0.941f, 0.107f, 0.176f, - -0.360f, 0.975f, -0.781f, -0.727f, 0.274f, 0.214f, -0.330f, 0.237f, 0.967f, 0.156f, -0.587f}; - std::vector output_data = {8.472f, 9.920f, 13.418f}; +TEST_F(TestOpenCL_Reduce, MeanWC) { + std::vector axis = {2, 3}; + std::vector input_shape = {1, 3, 2, 2}; + std::vector output_shape = {1, 3, 1, 1}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {1.5, 5.5, 9.5f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), true, schema::ReduceMode_ReduceSumSquare); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis, schema::ReduceMode_ReduceMean, true); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestReduceOpenCL, ReduceSumSquareWCFp32) { - int n = 1; - int h = 3; - int w = 2; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = {-0.686f, 0.613f, -0.701f, 0.978f, 0.632f, 0.677f, - 0.780f, -0.888f, 0.147f, 0.448f, -0.100f, 0.936f}; - std::vector output_data = {2.294f, 2.255f, 1.108f}; +TEST_F(TestOpenCL_Reduce, SumWC) { + std::vector axis = {2, 3}; + std::vector input_shape = {1, 3, 2, 2}; + std::vector output_shape = {1, 3, 1, 1}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {6, 22, 38}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSumSquare, true); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis, schema::ReduceMode_ReduceSum, true); + TestMain({{input_shape, input_data, VAR, kNumberTypeFloat32}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestReduceOpenCL, ReduceSumSquareWCLocalFp32) { - int n = 1; - int h = 5; - int w = 17; - int c = 2; - std::vector shape = {n, h, w, c}; - std::vector input_data = { - -0.309f, -0.836f, 0.749f, -0.820f, -0.715f, -0.770f, 0.030f, -0.817f, 0.009f, 0.146f, 0.642f, 0.382f, -0.085f, - -0.268f, -0.424f, -0.957f, -0.127f, -0.852f, 0.596f, 0.340f, -0.492f, -0.374f, -0.669f, 0.665f, -0.664f, -0.079f, - 0.462f, 0.469f, 0.187f, -0.730f, -0.240f, -0.446f, 0.254f, 0.284f, 0.743f, 0.297f, 0.235f, -0.068f, 0.652f, - -0.474f, -0.749f, -0.499f, 0.106f, -0.988f, 0.033f, -0.327f, -0.050f, -0.228f, -0.676f, -0.136f, -0.801f, 0.885f, - -0.108f, -0.019f, -0.092f, 0.538f, 0.760f, 0.996f, -0.610f, 0.125f, 0.296f, 0.861f, 0.811f, 0.948f, -0.665f, - 0.920f, 0.669f, 0.572f, -0.653f, -0.823f, -0.967f, -0.094f, 0.078f, 0.458f, 0.954f, -0.357f, 0.887f, -0.194f, - -0.453f, -0.774f, -0.805f, -0.064f, -0.671f, -0.151f, -0.910f, 0.695f, 0.762f, 0.755f, -0.933f, 0.277f, -0.697f, - 0.074f, -0.333f, 0.790f, -0.370f, 0.264f, -0.649f, 0.570f, 0.933f, 0.714f, 0.296f, -0.430f, 0.634f, 0.619f, - -0.744f, -0.898f, -0.908f, -0.800f, 0.500f, -0.688f, 0.816f, 0.901f, 0.054f, 0.993f, 0.346f, -0.285f, -0.926f, - 0.746f, -0.718f, 0.708f, -0.193f, 0.838f, -0.869f, -0.189f, -0.195f, -0.324f, -0.498f, -0.216f, 0.632f, -0.701f, - 0.272f, 0.550f, 0.486f, -0.415f, 0.285f, 0.617f, 0.740f, 0.170f, 0.486f, 0.251f, -0.165f, -0.424f, 0.705f, - -0.802f, -0.977f, -0.449f, 0.502f, -0.406f, 0.125f, -0.643f, -0.324f, -0.409f, 0.218f, 0.719f, -0.043f, -0.933f, - -0.580f, 0.830f, -0.091f, 0.998f, -0.458f, 0.142f, -0.220f, -0.440f, 0.824f, -0.349f, 0.983f, -0.546f, 0.085f, - 0.235f}; - std::vector output_data = {9.889f, 11.926f, 13.296f, 13.537f, 10.563f}; - RunTestCaseReduce(shape, input_data.data(), output_data.data(), false, schema::ReduceMode_ReduceSumSquare, true); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc index dfe91c1ef3..e95af71b0d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/reshape_tests.cc @@ -13,169 +13,91 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/reshape_parameter.h" -namespace mindspore { -class TestReshapeOpenCL : public mindspore::CommonTest { - public: - TestReshapeOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseReshape(const std::vector &shape_in, const std::vector &shape_out, void *input_data, - void *output_data, bool enable_fp16) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - shape_in, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - bool is_output_2d = shape_out.size() == 2; - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), shape_out, - is_output_2d ? schema::Format_NC : schema::Format_NHWC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator(inputs, outputs, nullptr, nullptr, - kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - } - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } +class TestOpenCL_Reshape : public CommonTest {}; - MS_LOG(INFO) << "Test Reshape passed"; +namespace { +// PrimitiveType_Reshape: src/ops/populate/reshape_populate.cc +OpParameter *CreateParameter() { + auto *param = test::CreateParameter(schema::PrimitiveType_Reshape); + return reinterpret_cast(param); } +} // namespace -TEST_F(TestReshapeOpenCL, ReshapeFp32) { +TEST_F(TestOpenCL_Reshape, 4D_2D_test0) { std::vector shape_in = {1, 1, 1, 7}; std::vector shape_out = {1, 7}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); -} - -TEST_F(TestReshapeOpenCL, ReshapeFp16) { - std::vector shape_in = {1, 1, 1, 7}; - std::vector shape_out = {1, 7}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), true); -} - -TEST_F(TestReshapeOpenCL, Reshape4DFp32) { - std::vector shape_in = {1, 2, 2, 3}; - std::vector shape_out = {1, 1, 4, 3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + float input_data[] = {0, 1, 2, 3, 4, 5, 6}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp16) { +TEST_F(TestOpenCL_Reshape, 4D_4D_test0) { std::vector shape_in = {1, 2, 2, 3}; std::vector shape_out = {1, 1, 4, 3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), true); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4D2DFp32) { +TEST_F(TestOpenCL_Reshape, 4D_2D_test1) { std::vector shape_in = {1, 2, 2, 4}; std::vector shape_out = {4, 4}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test0) { + +TEST_F(TestOpenCL_Reshape, 4D_4D_test1) { std::vector shape_in = {1, 4, 2, 3}; std::vector shape_out = {1, 3, 2, 4}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test1) { + +TEST_F(TestOpenCL_Reshape, 4D_4D_test2) { std::vector shape_in = {1, 2, 2, 5}; std::vector shape_out = {1, 1, 5, 4}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem01Test2) { + +TEST_F(TestOpenCL_Reshape, 4D_4D_test3) { std::vector shape_in = {1, 4, 2, 5}; std::vector shape_out = {1, 2, 5, 4}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, - 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, + float input_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, }; - std::vector output_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, - 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, + float output_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, }; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem10) { + +TEST_F(TestOpenCL_Reshape, 4D_4D_test4) { std::vector shape_in = {1, 5, 5, 8}; std::vector shape_out = {8, 1, 5, 5}; - std::vector input_data = { + float input_data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, @@ -185,7 +107,7 @@ TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem10) { 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199}; - std::vector output_data = { + float output_data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, @@ -196,19 +118,21 @@ TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem10) { 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199}; - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -TEST_F(TestReshapeOpenCL, Reshape4DFp32Rem11) { + +TEST_F(TestOpenCL_Reshape, 4D_4D_test5) { std::vector shape_in = {1, 3, 2, 5}; std::vector shape_out = {1, 5, 2, 3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, - 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, - 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, - 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f}; - - RunTestCaseReshape(shape_in, shape_out, input_data.data(), output_data.data(), false); + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}; + for (auto fp16_enable : {false, true}) { + TestMain({{shape_in, input_data, VAR}}, {shape_out, output_data}, CreateParameter(), fp16_enable); + } } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/resize_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/resize_tests.cc index 137cdd5c61..26f550c886 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/resize_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/resize_tests.cc @@ -13,169 +13,72 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "common/common_test.h" -#include "src/common/file_utils.h" -#include "src/common/log_adapter.h" -#include "src/runtime/kernel/opencl/kernel/resize.h" -#include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "src/runtime/opencl/opencl_runtime.h" -#include "test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/resize_parameter.h" -namespace mindspore { -class TestResizeOpenCL : public mindspore::CommonTest { - public: - TestResizeOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseResize(const std::vector &shape, void *input_data, void *output_data, bool enable_fp16, - int resize_mode, bool align_corners) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(ResizeParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - int n = shape[0]; - int h = shape[1]; - int w = shape[2]; - int oh = shape[3]; - int ow = shape[4]; - int c = shape[5]; - param->new_height_ = oh; - param->new_width_ = ow; - param->align_corners_ = align_corners; - param->method_ = resize_mode; - std::vector input_shape = {n, h, w, c}; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - std::vector out_shape = {n, oh, ow, c}; - auto tensor_out_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - out_shape, schema::Format_NHWC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); +class TestOpenCL_Resize : public CommonTest {}; - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - } - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - - MS_LOG(INFO) << "Test Resize passed"; +namespace { +// PrimitiveType_Resize: src/ops/populate/resize_populate.cc +OpParameter *CreateParameter(schema::ResizeMethod method, int new_height, int new_width, bool align_corners) { + auto *param = test::CreateParameter(schema::PrimitiveType_Resize); + param->new_height_ = new_height; + param->new_width_ = new_width; + param->align_corners_ = align_corners; + param->method_ = method; + param->preserve_aspect_ratio_ = false; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestResizeOpenCL, ResizeBilinearFp32) { - int n = 1; - int h = 2; - int w = 2; +TEST_F(TestOpenCL_Resize, Bilinear) { + schema::ResizeMethod method = schema::ResizeMethod_LINEAR; int oh = 4; int ow = 4; - int c = 1; bool align_corners = false; - std::vector shape = {n, h, w, oh, ow, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.5f, 1.0f, 1.0f, 1.0f, 1.5f, 2.0f, 2.0f, - 2.0f, 2.5f, 3.0f, 3.0f, 2.0f, 2.5f, 3.0f, 3.0f}; - RunTestCaseResize(shape, input_data.data(), output_data.data(), false, schema::ResizeMethod_LINEAR, align_corners); -} -TEST_F(TestResizeOpenCL, ResizeBilinearFp16) { - int n = 1; - int h = 2; - int w = 2; - int oh = 4; - int ow = 4; - int c = 1; - bool align_corners = false; - std::vector shape = {n, h, w, oh, ow, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.5f, 1.0f, 1.0f, 1.0f, 1.5f, 2.0f, 2.0f, - 2.0f, 2.5f, 3.0f, 3.0f, 2.0f, 2.5f, 3.0f, 3.0f}; - RunTestCaseResize(shape, input_data.data(), output_data.data(), true, schema::ResizeMethod_LINEAR, align_corners); + std::vector input_shape = {1, 2, 2, 1}; + std::vector output_shape = {1, oh, ow, 1}; + float input_data[] = {0, 1, 2, 3}; + float output_data[] = {0, 0.5, 1, 1, 1, 1.5, 2, 2, 2, 2.5, 3, 3, 2, 2.5, 3, 3}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(method, oh, ow, align_corners); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestResizeOpenCL, ResizeBilinearAlignFp32) { - int n = 1; - int h = 2; - int w = 2; +TEST_F(TestOpenCL_Resize, Bilinear_AlignCorners) { + schema::ResizeMethod method = schema::ResizeMethod_LINEAR; int oh = 3; int ow = 3; - int c = 1; bool align_corners = true; - std::vector shape = {n, h, w, oh, ow, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.5f, 1.0f, 1.0f, 1.5f, 2.0f, 2.0f, 2.5f, 3.0f}; - RunTestCaseResize(shape, input_data.data(), output_data.data(), false, schema::ResizeMethod_LINEAR, align_corners); -} -TEST_F(TestResizeOpenCL, ResizeNearestNeighborFp32) { - int n = 1; - int h = 2; - int w = 2; - int oh = 4; - int ow = 4; - int c = 1; - bool align_corners = false; - std::vector shape = {n, h, w, oh, ow, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, - 2.0f, 2.0f, 3.0f, 3.0f, 2.0f, 2.0f, 3.0f, 3.0f}; - RunTestCaseResize(shape, input_data.data(), output_data.data(), false, schema::ResizeMethod_NEAREST, align_corners); + std::vector input_shape = {1, 2, 2, 1}; + std::vector output_shape = {1, oh, ow, 1}; + float input_data[] = {0, 1, 2, 3}; + float output_data[] = {0, 0.5, 1, 1, 1.5, 2, 2, 2.5, 3}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(method, oh, ow, align_corners); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestResizeOpenCL, ResizeNearestNeighborFp16) { - int n = 1; - int h = 2; - int w = 2; +TEST_F(TestOpenCL_Resize, NEAREST) { + schema::ResizeMethod method = schema::ResizeMethod_NEAREST; int oh = 4; int ow = 4; - int c = 1; bool align_corners = false; - std::vector shape = {n, h, w, oh, ow, c}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f, 1.0f, 1.0f, - 2.0f, 2.0f, 3.0f, 3.0f, 2.0f, 2.0f, 3.0f, 3.0f}; - RunTestCaseResize(shape, input_data.data(), output_data.data(), true, schema::ResizeMethod_NEAREST, align_corners); + + std::vector input_shape = {1, 2, 2, 1}; + std::vector output_shape = {1, oh, ow, 1}; + float input_data[] = {0, 1, 2, 3}; + float output_data[] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(method, oh, ow, align_corners); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc index 1ce7d6e870..aeb9ef7b68 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/scale_tests.cc @@ -13,171 +13,75 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/scale.h" -namespace mindspore { -class TestScaleOpenCL : public mindspore::CommonTest { - public: - TestScaleOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseScale(void *input_data0, const std::vector &input_shape, void *scale_data, void *offset_data, - const std::vector &weight_shape, void *output_data, const std::vector &out_shape, - bool enable_fp16, int axis, int act_type = schema::ActivationType_NO_ACTIVATION) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(ScaleParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->axis_ = axis; - param->activation_type_ = act_type; - auto tensor_x_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), input_shape); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - - auto tensor_scale_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), weight_shape, - schema::Format_NHWC, lite::Tensor::Category::CONST_TENSOR); - auto tensor_scale = tensor_scale_ptr.get(); - if (tensor_scale == nullptr) { - MS_LOG(ERROR) << "tensor_scale create error."; - return; - } - tensor_scale->set_data(scale_data); - auto tensor_offset_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), weight_shape, - schema::Format_NHWC, lite::Tensor::Category::CONST_TENSOR); - auto tensor_offset = tensor_offset_ptr.get(); - if (tensor_offset == nullptr) { - MS_LOG(ERROR) << "tensor_offset create error."; - return; - } - tensor_offset->set_data(offset_data); - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), out_shape); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x, tensor_scale, tensor_offset}; - std::vector outputs{tensor_out}; - auto op_kernel_ptr = - std::make_unique(reinterpret_cast(param), inputs, outputs); - auto op_kernel = op_kernel_ptr.release(); - if (op_kernel == nullptr) { - MS_LOG(ERROR) << "op_kernel create error."; - return; - } - op_kernel->Init(); - inputs[0]->MallocData(allocator); +class TestOpenCL_Scale : public CommonTest {}; - std::vector kernels{op_kernel}; - - std::vector inputs_g{tensor_x}; - auto pGraph_ptr = std::make_unique(inputs_g, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data0, tensor_x->ElementsNum() * dtype_size); - pGraph->Run(); - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, tensor_out->ElementsNum(), static_cast(1e-5)); - } - - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - MS_LOG(INFO) << "TestScale passed"; +namespace { +// PrimitiveType_Resize: src/ops/populate/scale_populate.cc +OpParameter *CreateParameter(int axis, int activation_type = schema::ActivationType_NO_ACTIVATION) { + auto *param = test::CreateParameter(schema::PrimitiveType_Scale); + param->axis_ = axis; + param->activation_type_ = activation_type; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestScaleOpenCL, ScaleAxis3Fp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector weight_shape = {c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector scale_data = {1.0f, 2.0f, 3.0f}; - std::vector offset_data = {1.0f, 2.0f, 3.0f}; - std::vector output_data = {2.0f, 6.0f, 12.0f, 5.0f, 12.0f, 21.0f, 8.0f, 18.0f, 30.0f, 11.0f, 24.0f, 39.0f}; - RunTestCaseScale(input_data.data(), in_shape0, scale_data.data(), offset_data.data(), weight_shape, - output_data.data(), out_shape, false, 3); +TEST_F(TestOpenCL_Scale, Axis1) { + int axis = 1; + std::vector input_shape = {1, 2, 2, 3}; + std::vector weight_shape = {input_shape[axis]}; + std::vector output_shape = input_shape; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float scale_data[] = {1, 2}; + float offset_data[] = {1, 2}; + float output_data[] = {2, 3, 4, 5, 6, 7, 16, 18, 20, 22, 24, 26}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, scale_data, CONST_TENSOR}, + {weight_shape, offset_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestScaleOpenCL, ScaleAxis1Fp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector weight_shape = {h}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector scale_data = {1.0f, 2.0f}; - std::vector offset_data = {1.0f, 2.0f}; - std::vector output_data = {2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 16.0f, 18.0f, 20.0f, 22.0f, 24.0f, 26.0f}; - RunTestCaseScale(input_data.data(), in_shape0, scale_data.data(), offset_data.data(), weight_shape, - output_data.data(), out_shape, false, 1); +TEST_F(TestOpenCL_Scale, Axis3) { + int axis = 3; + std::vector input_shape = {1, 2, 2, 3}; + std::vector weight_shape = {input_shape[axis]}; + std::vector output_shape = input_shape; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float scale_data[] = {1, 2, 3}; + float offset_data[] = {1, 2, 3}; + float output_data[] = {2, 6, 12, 5, 12, 21, 8, 18, 30, 11, 24, 39}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, scale_data, CONST_TENSOR}, + {weight_shape, offset_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestScaleOpenCL, ScaleAxis3ReLU6Fp32) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector weight_shape = {c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector scale_data = {1.0f, 2.0f, -1.0f}; - std::vector offset_data = {1.0f, 2.0f, 3.0f}; - std::vector output_data = {2.0f, 6.0f, 0.0f, 5.0f, 6.0f, 0.0f, 6.0f, 6.0f, 0.0f, 6.0f, 6.0f, 0.0f}; - RunTestCaseScale(input_data.data(), in_shape0, scale_data.data(), offset_data.data(), weight_shape, - output_data.data(), out_shape, false, 3, schema::ActivationType_RELU6); +TEST_F(TestOpenCL_Scale, Axis3RELU6) { + int axis = 3; + std::vector input_shape = {1, 2, 2, 3}; + std::vector weight_shape = {input_shape[axis]}; + std::vector output_shape = input_shape; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + float scale_data[] = {1, 2, -1}; + float offset_data[] = {1, 2, 3}; + float output_data[] = {2, 6, 0, 5, 6, 0, 6, 6, 0, 6, 6, 0}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis, schema::ActivationType_RELU6); + TestMain({{input_shape, input_data, VAR}, + {weight_shape, scale_data, CONST_TENSOR}, + {weight_shape, offset_data, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestScaleOpenCL, ScaleAxis3Fp16) { - int n = 1; - int h = 2; - int w = 2; - int c = 3; - std::vector in_shape0 = {n, h, w, c}; - std::vector weight_shape = {c}; - std::vector out_shape = {n, h, w, c}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f}; - std::vector scale_data = {1.0f, 2.0f, 3.0f}; - std::vector offset_data = {1.0f, 2.0f, 3.0f}; - std::vector output_data = {2.0f, 6.0f, 12.0f, 5.0f, 12.0f, 21.0f, 8.0f, 18.0f, 30.0f, 11.0f, 24.0f, 39.0f}; - RunTestCaseScale(input_data.data(), in_shape0, scale_data.data(), offset_data.data(), weight_shape, - output_data.data(), out_shape, true, 3); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/shape_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/shape_tests.cc new file mode 100644 index 0000000000..f2fca060ab --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/shape_tests.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/shape.h" + +namespace mindspore::lite::opencl::test { + +class TestOpenCL_Shape : public CommonTest {}; + +namespace { +// PrimitiveType_Shape: src/ops/populate/shape_populate.cc +OpParameter *CreateParameter() { + auto *param = test::CreateParameter(schema::PrimitiveType_Shape); + return reinterpret_cast(param); +} +} // namespace + +TEST_F(TestOpenCL_Shape, test0) { + std::vector input_shape = {2, 4}; + std::vector output_shape = {2}; + float input_data[] = {-0.4045, -0.0924, -0.617, -0.10114, -0.9893, 0.3342, 2.445, -2.182}; + float output_data[] = {2, 4}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } +} + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index 692ee9b073..d87cc1dbcd 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -13,21 +13,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "common/common_test.h" #include "nnacl/slice_parameter.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" -namespace mindspore { +namespace mindspore::lite::opencl::test { -class TestSliceOpenCL : public mindspore::CommonTest {}; +class TestOpenCL_Slice : public CommonTest {}; -OpParameter *GetSliceParameter(const std::vector &begin, const std::vector &size) { - auto param = static_cast(malloc(sizeof(SliceParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "SliceParameter create error."; - return nullptr; - } - param->op_parameter_.type_ = schema::PrimitiveType_Slice; +namespace { +// PrimitiveType_Slice: src/ops/populate/slice_populate.cc +OpParameter *CreateParameter(const std::vector &begin, const std::vector &size) { + auto *param = test::CreateParameter(schema::PrimitiveType_Slice); param->param_length_ = begin.size(); for (int i = 0; i < begin.size(); ++i) { param->begin_[i] = begin[i]; @@ -35,21 +31,22 @@ OpParameter *GetSliceParameter(const std::vector &begin, const std::vector< } return reinterpret_cast(param); } +} // namespace -TEST_F(TestSliceOpenCL, 4D) { +TEST_F(TestOpenCL_Slice, 4D) { float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, 0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498, -1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366, -1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237, 0.05787678}; - float expect_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, + float output_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, 0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151, -0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237}; - auto param = GetSliceParameter({0, 0, 0, 2}, {1, 2, 2, 5}); - TestMain({{{1, 2, 2, 8}, input_data, Tensor::Category::VAR}}, {{1, 2, 2, 5}, expect_data}, param, false); + auto param = CreateParameter({0, 0, 0, 2}, {1, 2, 2, 5}); + TestMain({{{1, 2, 2, 8}, input_data, VAR}}, {{1, 2, 2, 5}, output_data}, param, false); } -TEST_F(TestSliceOpenCL, tflite_cpu) { +TEST_F(TestOpenCL_Slice, test0) { std::vector, std::vector, std::vector, std::vector, std::vector, std::vector>> cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {2}}, @@ -146,18 +143,16 @@ TEST_F(TestSliceOpenCL, tflite_cpu) { auto &input_shape = std::get<1>(case_); auto &output_shape = std::get<2>(case_); auto &input_data = std::get<3>(case_); - auto &expect_data = std::get<4>(case_); + auto &output_data = std::get<4>(case_); auto &begin = std::get<5>(case_); auto &size = std::get<6>(case_); std::cout << name << std::endl; - auto *param = GetSliceParameter(begin, size); - TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, - false); - param = GetSliceParameter(begin, size); - TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, - true); + auto *param = CreateParameter(begin, size); + TestMain({{input_shape, input_data.data(), VAR}}, {output_shape, output_data.data()}, param, false); + param = CreateParameter(begin, size); + TestMain({{input_shape, input_data.data(), VAR}}, {output_shape, output_data.data()}, param, true); } } // namespace mindspore -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc index 893da87506..b696111e3b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/softmax_tests.cc @@ -13,157 +13,62 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/softmax_parameter.h" -namespace mindspore { -class TestSoftmaxOpenCL : public mindspore::CommonTest { - public: - TestSoftmaxOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseSoftmax(const std::vector &shape, void *input_data, void *output_data, bool enable_fp16, - int axis) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - int n, h, w, c; - bool is_2d = false; - if (shape.size() == 2) { - is_2d = true; - h = w = 1; - n = shape[0]; - c = shape[1]; - } else { - n = shape[0]; - h = shape[1]; - w = shape[2]; - c = shape[3]; - } - std::vector input_shape = {n, h, w, c}; - if (is_2d) { - input_shape = {n, c}; - } - auto input_format = is_2d ? schema::Format_NC : schema::Format_NHWC; - auto input_dtype = enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32; - auto tensor_x_ptr = std::make_unique(TypeId(input_dtype), input_shape, input_format); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - auto tensor_out_ptr = std::make_unique(TypeId(input_dtype), input_shape, input_format); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto opParameter = static_cast(malloc(sizeof(SoftmaxParameter))); - if (opParameter == nullptr) { - MS_LOG(ERROR) << "opParameter create error."; - return; - } - opParameter->axis_ = axis; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(opParameter), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - } - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } +class TestOpenCL_SoftMax : public CommonTest {}; - MS_LOG(INFO) << "Test Softmax passed"; +namespace { +// PrimitiveType_SoftMax: src/ops/populate/softmax_populate.cc +OpParameter *CreateParameter(int axis) { + auto *param = test::CreateParameter(schema::PrimitiveType_SoftMax); + param->axis_ = axis; + return reinterpret_cast(param); } - -TEST_F(TestSoftmaxOpenCL, Softmax2DFp32) { - int n = 1; - int c = 10; - std::vector shape = {n, c}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}; - - RunTestCaseSoftmax(shape, input_data.data(), output_data.data(), false, 1); -} - -TEST_F(TestSoftmaxOpenCL, Softmax2DFp16) { - int n = 1; - int c = 10; - std::vector shape = {n, c}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f}; - - RunTestCaseSoftmax(shape, input_data.data(), output_data.data(), true, 1); +} // namespace + +TEST_F(TestOpenCL_SoftMax, 2D_axis1) { + int axis = 1; + std::vector input_shape = {1, 10}; + std::vector output_shape = input_shape; + float input_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float output_data[] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, + fp16_enable ? 2e-2 : 1e-5); + } } -TEST_F(TestSoftmaxOpenCL, Softmax4DFp32) { - int n = 1; - int h = 2; - int w = 1; - int c = 5; - std::vector shape = {n, h, w, c}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f}; - - RunTestCaseSoftmax(shape, input_data.data(), output_data.data(), false, 3); +TEST_F(TestOpenCL_SoftMax, 4D_axis3) { + int axis = 3; + std::vector input_shape = {1, 2, 1, 5}; + std::vector output_shape = input_shape; + float input_data[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + float output_data[] = {0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, + fp16_enable ? 2e-2 : 1e-5); + } } -TEST_F(TestSoftmaxOpenCL, Softmax4DFp16) { - int n = 1; - int h = 2; - int w = 1; - int c = 5; - std::vector shape = {n, h, w, c}; - std::vector input_data = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; - std::vector output_data = {0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f, 0.2f}; - - RunTestCaseSoftmax(shape, input_data.data(), output_data.data(), true, 3); +TEST_F(TestOpenCL_SoftMax, 4D_axis1) { + int axis = 1; + std::vector input_shape = {1, 2, 1, 1}; + std::vector output_shape = input_shape; + float input_data[] = {1, 1}; + float output_data[] = {0.5, 0.5}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(axis); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable, + fp16_enable ? 2e-2 : 1e-5); + } } -TEST_F(TestSoftmaxOpenCL, Softmax4DAxis1Fp32) { - int n = 1; - int h = 2; - int w = 1; - int c = 1; - std::vector shape = {n, h, w, c}; - std::vector input_data = {1.0f, 1.0f}; - std::vector output_data = {0.5f, 0.5f}; - - RunTestCaseSoftmax(shape, input_data.data(), output_data.data(), false, 1); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_batch_nd_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_batch_nd_tests.cc index 593e88ef10..c8c127837d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_batch_nd_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_batch_nd_tests.cc @@ -13,100 +13,43 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/fp32/space_to_batch_fp32.h" -namespace mindspore { -class TestSpaceToBatchNDOpenCL : public mindspore::CommonTest { - public: - TestSpaceToBatchNDOpenCL() {} -}; -template -void test_main_space_to_batch_nd(void *input_data, void *correct_data, const std::vector &input_shape, - SpaceToBatchParameter *param, TypeId data_type, schema::Format format) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime_wrap = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = ocl_runtime_wrap.GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); +namespace mindspore::lite::opencl::test { - std::vector output_shape = input_shape; - output_shape[0] = input_shape[0] * param->block_sizes_[0] * param->block_sizes_[1]; - output_shape[1] = (input_shape[1] + param->paddings_[0] + param->paddings_[1]) / param->block_sizes_[0]; - output_shape[2] = (input_shape[2] + +param->paddings_[2] + param->paddings_[3]) / param->block_sizes_[1]; - - auto tensor_a = lite::Tensor(TypeId(data_type), input_shape, format); - auto tensor_c = lite::Tensor(TypeId(data_type), output_shape, format); - std::vector inputs{&tensor_a}; - std::vector outputs{&tensor_c}; - size_t input_size = tensor_a.Size(); - - auto *pkernel = - new (std::nothrow) kernel::SpaceToBatchNDOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (pkernel == nullptr) { - MS_LOG(INFO) << "new SpaceToBatchNDOpenCLKernel failed "; - return; - } - pkernel->Init(); +class TestOpenCL_SpaceToBatch : public CommonTest {}; - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); +namespace { +// PrimitiveType_SpaceToBatchND: src/ops/populate/space_to_batch_nd_populate.cc +OpParameter *CreateParameter(const std::vector &block_sizes, const std::vector &paddings) { + auto *param = test::CreateParameter(schema::PrimitiveType_SpaceToBatchND); + EXPECT_LE(block_sizes.size(), 4); + EXPECT_LE(paddings.size(), 4); + for (int i = 0; i < block_sizes.size(); ++i) { + param->block_sizes_[i] = block_sizes[i]; } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{pkernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - delete pkernel; - MS_LOG(INFO) << " new SubGraphOpenCLKernel failed "; - return; + for (int i = 0; i < paddings.size(); ++i) { + param->paddings_[i] = paddings[i]; } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - T *input_ptr = reinterpret_cast(inputs[0]->MutableData()); - memcpy(input_ptr, input_data, input_size); - std::cout << "==================input data================" << std::endl; - for (auto i = 0; i < inputs[0]->ElementsNum(); ++i) { - std::cout << input_ptr[i] << ", "; - } - std::cout << std::endl; - - sub_graph->Run(); + return reinterpret_cast(param); +} - auto *output_data = reinterpret_cast(outputs[0]->MutableData()); - std::cout << "==================output data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << output_data[i] << ", "; - } - std::cout << std::endl; - std::cout << "==================correct data================" << std::endl; - for (auto i = 0; i < outputs[0]->ElementsNum(); ++i) { - std::cout << static_cast(correct_data)[i] << ", "; - } - std::cout << std::endl; - CommonTest::CompareOutputData(output_data, static_cast(correct_data), outputs[0]->ElementsNum(), 0.0001); - delete sub_graph; +std::vector InferShape(const std::vector &input_shape, const std::vector &block_sizes, + const std::vector &paddings) { + std::vector output_shape = input_shape; + output_shape[0] = input_shape[0] * block_sizes[0] * block_sizes[1]; + output_shape[1] = (input_shape[1] + paddings[0] + paddings[1]) / block_sizes[0]; + output_shape[2] = (input_shape[2] + +paddings[2] + paddings[3]) / block_sizes[1]; + return output_shape; } -TEST_F(TestSpaceToBatchNDOpenCL, NHWC4H2W2Pad2222) { +} // namespace + +TEST_F(TestOpenCL_SpaceToBatch, H2W2Pad2222) { std::vector input_shape{1, 6, 6, 4}; - SpaceToBatchParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; - } - param->block_sizes_[0] = 2; - param->block_sizes_[1] = 2; - param->paddings_[0] = 2; - param->paddings_[1] = 2; - param->paddings_[2] = 2; - param->paddings_[3] = 2; + std::vector block_sizes = {2, 2}; + std::vector paddings = {2, 2, 2, 2}; + auto output_shape = InferShape(input_shape, block_sizes, paddings); float input_data[] = {172, 47, 117, 192, 67, 251, 195, 103, 9, 211, 21, 242, 36, 87, 70, 216, 88, 140, 58, 193, 230, 39, 87, 174, 88, 81, 165, 25, 77, 72, 9, 148, 115, 208, 243, 197, 254, 79, 175, 192, 82, 99, 216, 177, 243, 29, 147, 147, 142, 167, 32, 193, 9, 185, @@ -115,7 +58,7 @@ TEST_F(TestSpaceToBatchNDOpenCL, NHWC4H2W2Pad2222) { 119, 11, 174, 82, 91, 128, 142, 99, 53, 140, 121, 170, 84, 203, 68, 6, 196, 47, 127, 244, 131, 204, 100, 180, 232, 78, 143, 148, 227, 186, 23, 207, 141, 117, 85, 48, 49, 69, 169, 163, 192, 95, 197, 94, 0, 113, 178, 36, 162, 48, 93, 131, 98, 42}; - float correct_data[] = { + float output_data[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 172, 47, 117, 192, 9, 211, 21, 242, 88, 140, 58, 193, 0, 0, 0, 0, 0, 0, 0, 0, 142, 167, 32, 193, 31, 202, 244, 151, 183, 28, 34, 128, 0, 0, 0, 0, 0, 0, 0, 0, 142, 99, 53, 140, 68, @@ -134,51 +77,10 @@ TEST_F(TestSpaceToBatchNDOpenCL, NHWC4H2W2Pad2222) { 132, 105, 42, 65, 231, 169, 57, 174, 82, 91, 128, 0, 0, 0, 0, 0, 0, 0, 0, 85, 48, 49, 69, 197, 94, 0, 113, 93, 131, 98, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NHWC; - test_main_space_to_batch_nd(input_data, correct_data, input_shape, param, data_type, format); -} -TEST_F(TestSpaceToBatchNDOpenCL, NC4HW4H2W2Pad2222) { - std::vector input_shape{1, 6, 6, 4}; - SpaceToBatchParameter *param = std::make_unique().release(); - if (param == nullptr) { - return; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_sizes, paddings); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - param->block_sizes_[0] = 2; - param->block_sizes_[1] = 2; - param->paddings_[0] = 2; - param->paddings_[1] = 2; - param->paddings_[2] = 2; - param->paddings_[3] = 2; - float input_data[] = {172, 47, 117, 192, 67, 251, 195, 103, 9, 211, 21, 242, 36, 87, 70, 216, 88, 140, - 58, 193, 230, 39, 87, 174, 88, 81, 165, 25, 77, 72, 9, 148, 115, 208, 243, 197, - 254, 79, 175, 192, 82, 99, 216, 177, 243, 29, 147, 147, 142, 167, 32, 193, 9, 185, - 127, 32, 31, 202, 244, 151, 163, 254, 203, 114, 183, 28, 34, 128, 128, 164, 53, 133, - 38, 232, 244, 17, 79, 132, 105, 42, 186, 31, 120, 1, 65, 231, 169, 57, 35, 102, - 119, 11, 174, 82, 91, 128, 142, 99, 53, 140, 121, 170, 84, 203, 68, 6, 196, 47, - 127, 244, 131, 204, 100, 180, 232, 78, 143, 148, 227, 186, 23, 207, 141, 117, 85, 48, - 49, 69, 169, 163, 192, 95, 197, 94, 0, 113, 178, 36, 162, 48, 93, 131, 98, 42}; - float correct_data[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 172, 47, 117, 192, 9, 211, 21, 242, 88, 140, 58, 193, 0, 0, 0, 0, 0, 0, 0, 0, 142, 167, - 32, 193, 31, 202, 244, 151, 183, 28, 34, 128, 0, 0, 0, 0, 0, 0, 0, 0, 142, 99, 53, 140, 68, - 6, 196, 47, 100, 180, 232, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 251, 195, 103, 36, 87, 70, 216, 230, 39, 87, 174, 0, 0, - 0, 0, 0, 0, 0, 0, 9, 185, 127, 32, 163, 254, 203, 114, 128, 164, 53, 133, 0, 0, 0, 0, 0, - 0, 0, 0, 121, 170, 84, 203, 127, 244, 131, 204, 143, 148, 227, 186, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 81, 165, 25, 115, 208, - 243, 197, 82, 99, 216, 177, 0, 0, 0, 0, 0, 0, 0, 0, 38, 232, 244, 17, 186, 31, 120, 1, 35, - 102, 119, 11, 0, 0, 0, 0, 0, 0, 0, 0, 23, 207, 141, 117, 169, 163, 192, 95, 178, 36, 162, 48, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 77, 72, 9, 148, 254, 79, 175, 192, 243, 29, 147, 147, 0, 0, 0, 0, 0, 0, 0, 0, 79, - 132, 105, 42, 65, 231, 169, 57, 174, 82, 91, 128, 0, 0, 0, 0, 0, 0, 0, 0, 85, 48, 49, 69, - 197, 94, 0, 113, 93, 131, 98, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0}; - TypeId data_type = kNumberTypeFloat32; - schema::Format format = schema::Format_NCHW; - test_main_space_to_batch_nd(input_data, correct_data, input_shape, param, data_type, format); } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc index 83ba480679..836abfed04 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/space_to_depth_tests.cc @@ -13,256 +13,154 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/fp32/space_to_depth_fp32.h" -namespace mindspore { -class TestSpaceToDepthOpenCL : public mindspore::CommonTest { - public: - TestSpaceToDepthOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestCaseSpaceToDepth(const std::vector &shape_in, const std::vector &shape_out, void *input_data, - void *output_data, bool enable_fp16, int block_size) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto allocator = ocl_runtime->GetAllocator(); - auto param = static_cast(malloc(sizeof(SpaceToDepthParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->block_size_ = block_size; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - shape_in, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - auto tensor_out_ptr = - std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), shape_out); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); - - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; - } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, inputs[0]->ElementsNum() * dtype_size); - pGraph->Run(); - - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-3), - 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, outputs[0]->ElementsNum(), static_cast(1e-5)); - } - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); - } - - MS_LOG(INFO) << "Test SpaceToDepth passed"; -} - -TEST_F(TestSpaceToDepthOpenCL, AlignTest1Fp32) { - std::vector shape_in = {1, 2, 2, 4}; - std::vector shape_out = {1, 1, 1, 16}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; - std::vector output_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 2); -} - -TEST_F(TestSpaceToDepthOpenCL, AlignTest1Fp16) { - std::vector shape_in = {1, 2, 2, 4}; - std::vector shape_out = {1, 1, 1, 16}; - std::vector input_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; - std::vector output_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, - 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}; +class TestOpenCL_SpaceToDepth : public CommonTest {}; - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), true, 2); -} - -TEST_F(TestSpaceToDepthOpenCL, AlignTest2Fp32) { - std::vector shape_in = {1, 4, 4, 4}; - std::vector shape_out = {1, 2, 2, 16}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, - 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f}; - std::vector output_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 2); +namespace { +// PrimitiveType_SpaceToDepth: src/ops/populate/space_to_depth_populate.cc +OpParameter *CreateParameter(int block_size) { + auto *param = test::CreateParameter(schema::PrimitiveType_SpaceToDepth); + param->block_size_ = block_size; + return reinterpret_cast(param); } - -TEST_F(TestSpaceToDepthOpenCL, AlignTest2Fp16) { - std::vector shape_in = {1, 4, 4, 4}; - std::vector shape_out = {1, 2, 2, 16}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, - 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f}; - std::vector output_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, - 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), true, 2); +} // namespace + +TEST_F(TestOpenCL_SpaceToDepth, AlignTest1) { + int block_size = 2; + std::vector input_shape = {1, 2, 2, 4}; + std::vector output_shape = {1, 1, 1, 16}; + float input_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + float output_data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, AlignTest3Fp32) { - std::vector shape_in = {1, 6, 6, 4}; - std::vector shape_out = {1, 2, 2, 36}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, - 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, - 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, - 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, - 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f, 97.0f, - 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, - 126.0f, 127.0f, 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, - 140.0f, 141.0f, 142.0f, 143.0f}; - std::vector output_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 24.0f, 25.0f, - 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 48.0f, 49.0f, 50.0f, 51.0f, - 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, - 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, - 44.0f, 45.0f, 46.0f, 47.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, - 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, - 96.0f, 97.0f, 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 120.0f, 121.0f, - 122.0f, 123.0f, 124.0f, 125.0f, 126.0f, 127.0f, 128.0f, 129.0f, 130.0f, 131.0f, 84.0f, 85.0f, 86.0f, 87.0f, - 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 108.0f, 109.0f, 110.0f, 111.0f, 112.0f, 113.0f, - 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, - 140.0f, 141.0f, 142.0f, 143.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 3); +TEST_F(TestOpenCL_SpaceToDepth, AlignTest2) { + int block_size = 2; + std::vector input_shape = {1, 4, 4, 4}; + std::vector output_shape = {1, 2, 2, 16}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, + 14, 15, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49, 50, 51, + 52, 53, 54, 55, 40, 41, 42, 43, 44, 45, 46, 47, 56, 57, 58, 59, 60, 61, 62, 63}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, NotAlignTest1Fp32) { - std::vector shape_in = {1, 2, 2, 1}; - std::vector shape_out = {1, 1, 1, 4}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 2); +TEST_F(TestOpenCL_SpaceToDepth, AlignTest3) { + int block_size = 3; + std::vector input_shape = {1, 6, 6, 4}; + std::vector output_shape = {1, 2, 2, 36}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 96, 97, 98, 99, 100, 101, + 102, 103, 104, 105, 106, 107, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 108, 109, 110, 111, 112, 113, + 114, 115, 116, 117, 118, 119, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, NotAlignTest1Fp16) { - std::vector shape_in = {1, 2, 2, 1}; - std::vector shape_out = {1, 1, 1, 4}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f}; +TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest1) { + int block_size = 2; + std::vector input_shape = {1, 2, 2, 1}; + std::vector output_shape = {1, 1, 1, 4}; + float input_data[] = {0, 1, 2, 3}; + float output_data[] = {0, 1, 2, 3}; - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), true, 2); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, NotAlignTest2Fp32) { - std::vector shape_in = {1, 2, 2, 3}; - std::vector shape_out = {1, 1, 1, 12}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, +TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest2) { + int block_size = 2; + std::vector input_shape = {1, 2, 2, 3}; + std::vector output_shape = {1, 1, 1, 12}; + float input_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, }; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; + float output_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 2); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, NotAlignTest3Fp32) { - std::vector shape_in = {1, 4, 4, 3}; - std::vector shape_out = {1, 2, 2, 12}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, - 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f}; - std::vector output_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, - 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, - 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 2); +TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest3) { + int block_size = 2; + std::vector input_shape = {1, 4, 4, 3}; + std::vector output_shape = {1, 2, 2, 12}; + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47}; + float output_data[] = {0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17, 6, 7, 8, 9, + 10, 11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 36, 37, + 38, 39, 40, 41, 30, 31, 32, 33, 34, 35, 42, 43, 44, 45, 46, 47}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestSpaceToDepthOpenCL, NotAlignTest4Fp32) { - std::vector shape_in = {1, 6, 6, 6}; - std::vector shape_out = {1, 2, 2, 54}; - std::vector input_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, - 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, - 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, - 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, - 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, - 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f, 97.0f, - 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, - 126.0f, 127.0f, 128.0f, 129.0f, 130.0f, 131.0f, 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, - 140.0f, 141.0f, 142.0f, 143.0f, 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, - 154.0f, 155.0f, 156.0f, 157.0f, 158.0f, 159.0f, 160.0f, 161.0f, 162.0f, 163.0f, 164.0f, 165.0f, 166.0f, 167.0f, - 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, 176.0f, 177.0f, 178.0f, 179.0f, 180.0f, 181.0f, - 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, 190.0f, 191.0f, 192.0f, 193.0f, 194.0f, 195.0f, - 196.0f, 197.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, 208.0f, 209.0f, - 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f}; - std::vector output_data = { - 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, - 14.0f, 15.0f, 16.0f, 17.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, - 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, - 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 18.0f, 19.0f, - 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, - 34.0f, 35.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, - 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f, 97.0f, - 98.0f, 99.0f, 100.0f, 101.0f, 102.0f, 103.0f, 104.0f, 105.0f, 106.0f, 107.0f, 108.0f, 109.0f, 110.0f, 111.0f, - 112.0f, 113.0f, 114.0f, 115.0f, 116.0f, 117.0f, 118.0f, 119.0f, 120.0f, 121.0f, 122.0f, 123.0f, 124.0f, 125.0f, - 144.0f, 145.0f, 146.0f, 147.0f, 148.0f, 149.0f, 150.0f, 151.0f, 152.0f, 153.0f, 154.0f, 155.0f, 156.0f, 157.0f, - 158.0f, 159.0f, 160.0f, 161.0f, 180.0f, 181.0f, 182.0f, 183.0f, 184.0f, 185.0f, 186.0f, 187.0f, 188.0f, 189.0f, - 190.0f, 191.0f, 192.0f, 193.0f, 194.0f, 195.0f, 196.0f, 197.0f, 126.0f, 127.0f, 128.0f, 129.0f, 130.0f, 131.0f, - 132.0f, 133.0f, 134.0f, 135.0f, 136.0f, 137.0f, 138.0f, 139.0f, 140.0f, 141.0f, 142.0f, 143.0f, 162.0f, 163.0f, - 164.0f, 165.0f, 166.0f, 167.0f, 168.0f, 169.0f, 170.0f, 171.0f, 172.0f, 173.0f, 174.0f, 175.0f, 176.0f, 177.0f, - 178.0f, 179.0f, 198.0f, 199.0f, 200.0f, 201.0f, 202.0f, 203.0f, 204.0f, 205.0f, 206.0f, 207.0f, 208.0f, 209.0f, - 210.0f, 211.0f, 212.0f, 213.0f, 214.0f, 215.0f}; - - RunTestCaseSpaceToDepth(shape_in, shape_out, input_data.data(), output_data.data(), false, 3); +TEST_F(TestOpenCL_SpaceToDepth, NotAlignTest4) { + int block_size = 3; + std::vector input_shape = {1, 6, 6, 6}; + std::vector output_shape = {1, 2, 2, 54}; + float input_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215}; + float output_data[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(block_size); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -} // namespace mindspore + +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/sparse_to_dense_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/sparse_to_dense_tests.cc index 6b1c1b1fe1..ea8fc94e6b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/sparse_to_dense_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/sparse_to_dense_tests.cc @@ -1,11 +1,11 @@ /** * Copyright 2020 Huawei Technologies Co., Ltd * - * Licensed under the Apache License, Version 2.0 (the "License"); + * Licensed under the Apache License, Version 2 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -13,519 +13,175 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h" -using mindspore::lite::Tensor; -using mindspore::schema::Format::Format_NHWC; -namespace mindspore { -class TestSparseToDenseOpenCLCI : public mindspore::CommonTest { - public: - TestSparseToDenseOpenCLCI() {} -}; +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/sparse_to_dense_parameter.h" -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim2Shape3Vector) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); +namespace mindspore::lite::opencl::test { - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6, 3}; - std::vector input_shape2 = {3}; - std::vector input_shape3 = {6}; - std::vector input_shape4 = {1}; - float input_data1[] = {0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6}; - float input_data2[] = {6, 1, 10}; - float input_data3[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - float input_data4[] = {0.0}; - float correctOutput[] = {1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - auto data_type = kNumberTypeFloat32; - std::vector output_shape = {6, 1, 10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, lite::Tensor::VAR); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::VAR); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; - } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; - } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); +class TestOpenCL_SparseToDense : public CommonTest {}; - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; +namespace { +// PrimitiveType_SparseToDense: src/ops/populate/sparse_to_dense_populate.cc +OpParameter *CreateParameter() { + auto *param = test::CreateParameter(schema::PrimitiveType_SparseToDense); + return reinterpret_cast(param); } +} // namespace -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim2Scalar) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6, 2}; - std::vector input_shape2 = {2}; +TEST_F(TestOpenCL_SparseToDense, Dim2Shape3Vector) { + std::vector input_shape0 = {6, 3}; + std::vector input_shape1 = {3}; + std::vector input_shape2 = {6}; std::vector input_shape3 = {1}; - std::vector input_shape4 = {1}; - float input_data1[] = {0, 0, 1, 2, 2, 3, 3, 6, 4, 7, 5, 9}; - float input_data2[] = {6, 10}; - float input_data3[] = {6.0}; - float input_data4[] = {0.0}; - float correctOutput[] = {6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6}; - auto data_type = kNumberTypeFloat32; - std::vector output_shape = {6, 10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, lite::Tensor::VAR); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::VAR); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; - } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; + std::vector output_shape = {6, 1, 10}; + float input_data0[] = {0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 3, 0, 0, 4, 0, 0, 5, 0, 0, 6}; + float input_data1[] = {6, 1, 10}; + float input_data2[] = {1, 2, 3, 4, 5, 6}; + float input_data3[] = {0}; + float output_data[] = {1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_TENSOR}, + {input_shape3, input_data3, CONST_SCALAR}}, + {output_shape, output_data}, param, fp16_enable); } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim2Vector) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6, 2}; - std::vector input_shape2 = {2}; - std::vector input_shape3 = {6}; - std::vector input_shape4 = {1}; - float input_data1[] = {0, 0, 1, 2, 2, 3, 3, 6, 4, 7, 5, 9}; - float input_data2[] = {6, 10}; - float input_data3[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - float input_data4[] = {0.0}; - float correctOutput[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6}; - auto data_type = kNumberTypeFloat32; +TEST_F(TestOpenCL_SparseToDense, Dim2Scalar) { + std::vector input_shape0 = {6, 2}; + std::vector input_shape1 = {2}; + std::vector input_shape2 = {1}; + std::vector input_shape3 = {1}; std::vector output_shape = {6, 10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, lite::Tensor::VAR); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::VAR); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; + float input_data0[] = {0, 0, 1, 2, 2, 3, 3, 6, 4, 7, 5, 9}; + float input_data1[] = {6, 10}; + float input_data2[] = {6}; + float input_data3[] = {0}; + float output_data[] = {6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_SCALAR}, + {input_shape3, input_data3, CONST_SCALAR}}, + {output_shape, output_data}, param, fp16_enable); } +} - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; +TEST_F(TestOpenCL_SparseToDense, Dim2Vector) { + std::vector input_shape0 = {6, 2}; + std::vector input_shape1 = {2}; + std::vector input_shape2 = {6}; + std::vector input_shape3 = {1}; + std::vector output_shape = {6, 10}; + float input_data0[] = {0, 0, 1, 2, 2, 3, 3, 6, 4, 7, 5, 9}; + float input_data1[] = {6, 10}; + float input_data2[] = {1, 2, 3, 4, 5, 6}; + float input_data3[] = {0}; + float output_data[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_TENSOR}, + {input_shape3, input_data3, CONST_SCALAR}}, + {output_shape, output_data}, param, fp16_enable); } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim2Shape1Vector) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6, 1}; - std::vector input_shape2 = {1}; - std::vector input_shape3 = {6}; - std::vector input_shape4 = {1}; - float input_data1[] = {0, 2, 3, 6, 7, 9}; - float input_data2[] = {10}; - float input_data3[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - float input_data4[] = {0.0}; - float correctOutput[] = {1, 0, 2, 3, 0, 0, 4, 5, 0, 6}; - auto data_type = kNumberTypeFloat32; +TEST_F(TestOpenCL_SparseToDense, Dim2Shape1Vector) { + std::vector input_shape0 = {6, 1}; + std::vector input_shape1 = {1}; + std::vector input_shape2 = {6}; + std::vector input_shape3 = {1}; std::vector output_shape = {10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, lite::Tensor::VAR); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::VAR); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; - } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; + float input_data0[] = {0, 2, 3, 6, 7, 9}; + float input_data1[] = {10}; + float input_data2[] = {1, 2, 3, 4, 5, 6}; + float input_data3[] = {0}; + float output_data[] = {1, 0, 2, 3, 0, 0, 4, 5, 0, 6}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_TENSOR}, + {input_shape3, input_data3, CONST_SCALAR}}, + {output_shape, output_data}, param, fp16_enable); } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; - } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim2Shape1Scalar) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {7, 1}; // shape[1] = 1 +TEST_F(TestOpenCL_SparseToDense, Dim2Shape1Scalar) { + std::vector input_shape0 = {7, 1}; + std::vector input_shape1 = {1}; std::vector input_shape2 = {1}; std::vector input_shape3 = {1}; - std::vector input_shape4 = {1}; - float input_data1[] = {0, 1, 2, 3, 4, 5, 9}; - float input_data2[] = {10}; - float input_data3[] = {6.0}; - float input_data4[] = {0.0}; - float correctOutput[] = {6, 6, 6, 6, 6, 6, 0, 0, 0, 6}; - auto data_type = kNumberTypeFloat32; std::vector output_shape = {10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, lite::Tensor::VAR); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, lite::Tensor::CONST_TENSOR); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, lite::Tensor::VAR); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; + float input_data0[] = {0, 1, 2, 3, 4, 5, 9}; + float input_data1[] = {10}; + float input_data2[] = {6}; + float input_data3[] = {0}; + float output_data[] = {6, 6, 6, 6, 6, 6, 0, 0, 0, 6}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_SCALAR}, + {input_shape3, input_data3, CONST_SCALAR}}, + {output_shape, output_data}, param, fp16_enable); } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; - } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim1Scalar) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6}; +TEST_F(TestOpenCL_SparseToDense, Dim1Scalar) { + std::vector input_shape0 = {6}; + std::vector input_shape1 = {1}; std::vector input_shape2 = {1}; std::vector input_shape3 = {1}; - std::vector input_shape4 = {1}; - float input_data1[] = {1, 3, 4, 5, 6, 7}; - float input_data2[] = {10}; - float input_data3[] = {1.0}; - float input_data4[] = {2.0}; - float correctOutput[] = {2, 1, 2, 1, 1, 1, 1, 1, 2, 2}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; std::vector output_shape = {10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, tensor_type); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, tensor_type); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, lite::Tensor::CONST_SCALAR); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, tensor_type); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, tensor_type); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; + float input_data0[] = {1, 3, 4, 5, 6, 7}; + float input_data1[] = {10}; + float input_data2[] = {1}; + float input_data3[] = {2}; + float output_data[] = {2, 1, 2, 1, 1, 1, 1, 1, 2, 2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_SCALAR}, + {input_shape3, input_data3, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; - } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } -TEST_F(TestSparseToDenseOpenCLCI, Fp32Dim1Vector) { - MS_LOG(INFO) << " begin test "; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto runtime = runtime_wrapper.GetInstance(); - runtime->Init(); - auto allocator = runtime->GetAllocator(); - MS_LOG(INFO) << " init tensors "; - std::vector input_shape1 = {6}; - std::vector input_shape2 = {1}; - std::vector input_shape3 = {6}; - std::vector input_shape4 = {1}; - float input_data1[] = {1, 3, 4, 5, 6, 7}; - float input_data2[] = {10}; - float input_data3[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - float input_data4[] = {2.0}; - float correctOutput[] = {2, 1, 2, 2, 3, 4, 5, 6, 2, 2}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; +TEST_F(TestOpenCL_SparseToDense, Dim1Vector) { + std::vector input_shape0 = {6}; + std::vector input_shape1 = {1}; + std::vector input_shape2 = {6}; + std::vector input_shape3 = {1}; std::vector output_shape = {10}; - auto in_tensor1 = Tensor(data_type, input_shape1, Format_NHWC, tensor_type); - auto in_tensor2 = Tensor(data_type, input_shape2, Format_NHWC, tensor_type); - auto in_tensor3 = Tensor(data_type, input_shape3, Format_NHWC, tensor_type); - auto in_tensor4 = Tensor(data_type, input_shape4, Format_NHWC, tensor_type); - auto output_tensor = Tensor(data_type, output_shape, Format_NHWC, tensor_type); - // allocate memory for weights - in_tensor2.MallocData(); - in_tensor3.MallocData(); - in_tensor4.MallocData(); - std::vector inputs{&in_tensor1, &in_tensor2, &in_tensor3, &in_tensor4}; - std::vector outputs{&output_tensor}; - // initialize weights - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data3)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data4)); - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(SparseToDenseParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new ActivationParameter failed "; - return; + float input_data0[] = {1, 3, 4, 5, 6, 7}; + float input_data1[] = {10}; + float input_data2[] = {1, 2, 3, 4, 5, 6}; + float input_data3[] = {2}; + float output_data[] = {2, 1, 2, 2, 3, 4, 5, 6, 2, 2}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(); + TestMain({{input_shape0, input_data0, VAR}, + {input_shape1, input_data1, CONST_TENSOR}, + {input_shape2, input_data2, CONST_TENSOR}, + {input_shape3, input_data3, CONST_TENSOR}}, + {output_shape, output_data}, param, fp16_enable); } - - auto *sparse_to_dense_kernel = - new (std::nothrow) kernel::SparseToDenseOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (sparse_to_dense_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::SparseToDenseOpenCLKernel failed "; - delete param; - return; - } - sparse_to_dense_kernel->Init(); - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{sparse_to_dense_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel({&in_tensor1}, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - delete param; - delete sparse_to_dense_kernel; - return; - } - // to do allocate memory for inputs - in_tensor1.MallocData(allocator); - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor.data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor.ElementsNum(), 0.0001)); - delete sub_graph; } - -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/stack_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/stack_tests.cc index 9df29e386f..298cab43b2 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/stack_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/stack_tests.cc @@ -13,271 +13,51 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h" -namespace mindspore { -class TestStackOpenCLCI : public mindspore::CommonTest { - public: - TestStackOpenCLCI() {} -}; +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/stack_parameter.h" -class TestStackOpenCLfp16 : public mindspore::CommonTest { - public: - TestStackOpenCLfp16() {} -}; +namespace mindspore::lite::opencl::test { -TEST_F(TestStackOpenCLCI, StackFp32_8inputforCI) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); +class TestOpenCL_Stack : public CommonTest {}; - MS_LOG(INFO) << " init tensors "; - constexpr int INPUT_NUM = 8; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 1, 8}, std::vector{1, 1, 8}, std::vector{1, 1, 8}, std::vector{1, 1, 8}, - std::vector{1, 1, 8}, std::vector{1, 1, 8}, std::vector{1, 1, 8}, std::vector{1, 1, 8}}; - std::vector output_shape = {8, 1, 1, 8}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - float input_data1[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f}; - float input_data2[] = {0.5f, 0.6f, 0.74f, 0.23f, 0.46f, 0.69f, 0.13f, 0.47f}; - float input_data3[] = {0.31f, 0.63f, 0.84f, 0.43f, 0.56f, 0.79f, 0.12f, 0.57f}; - float input_data4[] = {0.35f, 0.26f, 0.17f, 0.33f, 0.66f, 0.89f, 0.93f, 0.77f}; - float input_data5[] = {0.57f, 0.6f, 0.84f, 0.83f, 0.48f, 0.78f, 0.63f, 0.87f}; - float input_data6[] = {0.66f, 0.56f, 0.64f, 0.63f, 0.56f, 0.59f, 0.73f, 0.37f}; - float input_data7[] = {0.35f, 0.26f, 0.54f, 0.33f, 0.76f, 0.59f, 0.73f, 0.34f}; - float input_data8[] = {0.15f, 0.36f, 0.44f, 0.73f, 0.56f, 0.49f, 0.93f, 0.37f}; - float correctOutput[] = {0.75f, 0.06f, 0.74f, 0.30f, 0.9f, 0.59f, 0.03f, 0.37f, 0.5f, 0.6f, 0.74f, 0.23f, 0.46f, - 0.69f, 0.13f, 0.47f, 0.31f, 0.63f, 0.84f, 0.43f, 0.56f, 0.79f, 0.12f, 0.57f, 0.35f, 0.26f, - 0.17f, 0.33f, 0.66f, 0.89f, 0.93f, 0.77f, 0.57f, 0.6f, 0.84f, 0.83f, 0.48f, 0.78f, 0.63f, - 0.87f, 0.66f, 0.56f, 0.64f, 0.63f, 0.56f, 0.59f, 0.73f, 0.37f, 0.35f, 0.26f, 0.54f, 0.33f, - 0.76f, 0.59f, 0.73f, 0.34f, 0.15f, 0.36f, 0.44f, 0.73f, 0.56f, 0.49f, 0.93f, 0.37f}; - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - return; - } - std::vector inputs; - std::vector outputs{output_tensor}; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(StackParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new StackParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 0; - auto *stack_kernel = - new (std::nothrow) kernel::StackOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (stack_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::StackOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - stack_kernel->Init(); - // to do allocate memory for inputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{stack_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete stack_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - memcpy(inputs[0]->data_c(), input_data1, sizeof(input_data1)); - memcpy(inputs[1]->data_c(), input_data2, sizeof(input_data2)); - memcpy(inputs[2]->data_c(), input_data3, sizeof(input_data1)); - memcpy(inputs[3]->data_c(), input_data4, sizeof(input_data2)); - memcpy(inputs[4]->data_c(), input_data5, sizeof(input_data1)); - memcpy(inputs[5]->data_c(), input_data6, sizeof(input_data2)); - memcpy(inputs[6]->data_c(), input_data7, sizeof(input_data1)); - memcpy(inputs[7]->data_c(), input_data8, sizeof(input_data2)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.00001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; +namespace { +// PrimitiveType_Stack: src/ops/populate/stack_populate.cc +OpParameter *CreateParameter(int axis) { + auto *param = test::CreateParameter(schema::PrimitiveType_Stack); + param->axis_ = axis; + return reinterpret_cast(param); } +} // namespace -TEST_F(TestStackOpenCLfp16, StackFp32_8inputaxis1) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - // get the input from .bin - size_t input1_size, input2_size, input3_size, input4_size, input5_size, input6_size, input7_size, input8_size, - output_size; - std::string input1Ppath = "./test_data/stackfp16_input1.bin"; - std::string input2Ppath = "./test_data/stackfp16_input2.bin"; - std::string input3Ppath = "./test_data/stackfp16_input3.bin"; - std::string input4Ppath = "./test_data/stackfp16_input4.bin"; - std::string input5Ppath = "./test_data/stackfp16_input5.bin"; - std::string input6Ppath = "./test_data/stackfp16_input6.bin"; - std::string input7Ppath = "./test_data/stackfp16_input7.bin"; - std::string input8Ppath = "./test_data/stackfp16_input8.bin"; - std::string correctOutputPath = "./test_data/stackfp16_output.bin"; - auto input_data1 = reinterpret_cast(mindspore::lite::ReadFile(input1Ppath.c_str(), &input1_size)); - auto input_data2 = reinterpret_cast(mindspore::lite::ReadFile(input2Ppath.c_str(), &input2_size)); - auto input_data3 = reinterpret_cast(mindspore::lite::ReadFile(input3Ppath.c_str(), &input3_size)); - auto input_data4 = reinterpret_cast(mindspore::lite::ReadFile(input4Ppath.c_str(), &input4_size)); - auto input_data5 = reinterpret_cast(mindspore::lite::ReadFile(input5Ppath.c_str(), &input5_size)); - auto input_data6 = reinterpret_cast(mindspore::lite::ReadFile(input6Ppath.c_str(), &input6_size)); - auto input_data7 = reinterpret_cast(mindspore::lite::ReadFile(input7Ppath.c_str(), &input7_size)); - auto input_data8 = reinterpret_cast(mindspore::lite::ReadFile(input8Ppath.c_str(), &input8_size)); - auto correctOutput = - reinterpret_cast(mindspore::lite::ReadFile(correctOutputPath.c_str(), &output_size)); - MS_LOG(INFO) << " init tensors "; +TEST_F(TestOpenCL_Stack, input8_ndim3_axis0) { constexpr int INPUT_NUM = 8; - std::array, INPUT_NUM> input_shapes = { - std::vector{1, 17, 18}, std::vector{1, 17, 18}, std::vector{1, 17, 18}, std::vector{1, 17, 18}, - std::vector{1, 17, 18}, std::vector{1, 17, 18}, std::vector{1, 17, 18}, std::vector{1, 17, 18}}; - std::vector output_shape = {1, 8, 17, 18}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - std::vector inputs; - for (auto &shape : input_shapes) { - auto input_temp = new (std::nothrow) lite::Tensor(data_type, shape, schema::Format_NHWC, tensor_type); - inputs.push_back(input_temp); - if (input_temp == nullptr) { - MS_LOG(INFO) << " new input_tensor failed "; - return; - } - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - MS_LOG(INFO) << " new output_tensor failed "; - for (auto tensor : inputs) { - delete tensor; - } - return; - } - std::vector outputs{output_tensor}; - MS_LOG(INFO) << " input_shapes size =: " << input_shapes.size(); - - MS_LOG(INFO) << " initialize tensors "; - auto param = reinterpret_cast(malloc(sizeof(StackParameter))); - if (param == nullptr) { - MS_LOG(INFO) << " new StackParameter failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - return; - } - param->axis_ = 1; - auto *stack_kernel = - new (std::nothrow) kernel::StackOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (stack_kernel == nullptr) { - MS_LOG(INFO) << " new kernel::StackOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - return; - } - stack_kernel->Init(); - // to allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{stack_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete stack_kernel; - return; - } - sub_graph->Init(); - MS_LOG(INFO) << " initialize input data "; - if (inputs.size() == 8) { - memcpy(inputs[0]->data_c(), input_data1, input1_size); - memcpy(inputs[1]->data_c(), input_data2, input2_size); - memcpy(inputs[2]->data_c(), input_data3, input3_size); - memcpy(inputs[3]->data_c(), input_data4, input4_size); - memcpy(inputs[4]->data_c(), input_data5, input5_size); - memcpy(inputs[5]->data_c(), input_data6, input6_size); - memcpy(inputs[6]->data_c(), input_data7, input7_size); - memcpy(inputs[7]->data_c(), input_data8, input8_size); - } else { - MS_LOG(ERROR) << " input size must be 2 or 3 or 4"; - } - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->MutableData()); - ASSERT_EQ(0, CompareOutputData(output_data_gpu, correctOutput, output_tensor->ElementsNum(), 0.000001)); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; + int axis = 0; + std::vector input_shapes[INPUT_NUM] = {{1, 1, 8}, {1, 1, 8}, {1, 1, 8}, {1, 1, 8}, + {1, 1, 8}, {1, 1, 8}, {1, 1, 8}, {1, 1, 8}}; + std::vector output_shape = {8, 1, 1, 8}; + float input_datas[INPUT_NUM][8] = { + {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37}, {0.5, 0.6, 0.74, 0.23, 0.46, 0.69, 0.13, 0.47}, + {0.31, 0.63, 0.84, 0.43, 0.56, 0.79, 0.12, 0.57}, {0.35, 0.26, 0.17, 0.33, 0.66, 0.89, 0.93, 0.77}, + {0.57, 0.6, 0.84, 0.83, 0.48, 0.78, 0.63, 0.87}, {0.66, 0.56, 0.64, 0.63, 0.56, 0.59, 0.73, 0.37}, + {0.35, 0.26, 0.54, 0.33, 0.76, 0.59, 0.73, 0.34}, {0.15, 0.36, 0.44, 0.73, 0.56, 0.49, 0.93, 0.37}}; + float output_data[] = {0.75, 0.06, 0.74, 0.30, 0.9, 0.59, 0.03, 0.37, 0.5, 0.6, 0.74, 0.23, 0.46, + 0.69, 0.13, 0.47, 0.31, 0.63, 0.84, 0.43, 0.56, 0.79, 0.12, 0.57, 0.35, 0.26, + 0.17, 0.33, 0.66, 0.89, 0.93, 0.77, 0.57, 0.6, 0.84, 0.83, 0.48, 0.78, 0.63, + 0.87, 0.66, 0.56, 0.64, 0.63, 0.56, 0.59, 0.73, 0.37, 0.35, 0.26, 0.54, 0.33, + 0.76, 0.59, 0.73, 0.34, 0.15, 0.36, 0.44, 0.73, 0.56, 0.49, 0.93, 0.37}; + + for (auto fp16_enable : {false}) { + auto *param = CreateParameter(axis); + TestMain({{input_shapes[0], input_datas[0], VAR}, + {input_shapes[1], input_datas[1], VAR}, + {input_shapes[2], input_datas[2], VAR}, + {input_shapes[3], input_datas[3], VAR}, + {input_shapes[4], input_datas[4], VAR}, + {input_shapes[5], input_datas[5], VAR}, + {input_shapes[6], input_datas[6], VAR}, + {input_shapes[7], input_datas[7], VAR}}, + {output_shape, output_data}, param, fp16_enable); } - delete sub_graph; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc index 4b4410841d..1415cf02a5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc @@ -13,22 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "common/common_test.h" +#include "ut/src/runtime/kernel/opencl/common.h" #include "nnacl/strided_slice.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" -namespace mindspore { +namespace mindspore::lite::opencl::test { -class TestStridedSliceOpenCL : public mindspore::CommonTest {}; +class TestOpenCL_StridedSlice : public CommonTest {}; -OpParameter *GetStridedSliceParameter(const std::vector &begins, const std::vector &ends, - const std::vector &strides) { - auto param = static_cast(malloc(sizeof(StridedSliceParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "create StridedSliceParameter error."; - return nullptr; - } - param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; +namespace { +// PrimitiveType_StridedSlice: src/ops/populate/strided_slice_populate.cc +OpParameter *CreateParameter(const std::vector &begins, const std::vector &ends, + const std::vector &strides) { + auto *param = test::CreateParameter(schema::PrimitiveType_StridedSlice); param->num_axes_ = begins.size(); for (int i = 0; i < begins.size(); ++i) { param->begins_[i] = begins[i]; @@ -37,84 +33,109 @@ OpParameter *GetStridedSliceParameter(const std::vector &begins, const std: } return reinterpret_cast(param); } +} // namespace -TEST_F(TestStridedSliceOpenCL, 1D) { +TEST_F(TestOpenCL_StridedSlice, 1D) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33}; - auto *param = GetStridedSliceParameter({3}, {36}, {3}); - TestMain({{{36}, input_data, Tensor::Category::VAR}}, {{11}, expect_data}, param, false); + float output_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({3}, {36}, {3}); + TestMain({{{36}, input_data, VAR}}, {{11}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, 2D) { +TEST_F(TestOpenCL_StridedSlice, 2D) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data[] = {11, 14}; - auto *param = GetStridedSliceParameter({1, 2}, {3, 8}, {2, 3}); - TestMain({{{4, 9}, input_data, Tensor::Category::VAR}}, {{1, 2}, expect_data}, param, false); + float output_data[] = {11, 14}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 2}, {3, 8}, {2, 3}); + TestMain({{{4, 9}, input_data, VAR}}, {{1, 2}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, 3D) { +TEST_F(TestOpenCL_StridedSlice, 3D) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data[] = {11, 14}; - auto *param = GetStridedSliceParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3}); - TestMain({{{1, 4, 9}, input_data, Tensor::Category::VAR}}, {{1, 1, 2}, expect_data}, param, false); + float output_data[] = {11, 14}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3}); + TestMain({{{1, 4, 9}, input_data, VAR}}, {{1, 1, 2}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, 4D) { +TEST_F(TestOpenCL_StridedSlice, 4D) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + float output_data0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - auto *param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, false); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{2, 2, 3, 3}, output_data0}, param, fp16_enable); + } - param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, true); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{2, 2, 3, 3}, output_data0}, param, fp16_enable); + } - float expect_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 3, 3}, expect_data1}, param, false); + float output_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{1, 2, 3, 3}, output_data1}, param, fp16_enable); + } - float expect_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35}; - param = GetStridedSliceParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 3, 3}, expect_data2}, param, false); + float output_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{1, 1, 3, 3}, output_data2}, param, fp16_enable); + } - float expect_data3[] = {33, 34, 35}; - param = GetStridedSliceParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 3}, expect_data3}, param, false); + float output_data3[] = {33, 34, 35}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{1, 1, 1, 3}, output_data3}, param, fp16_enable); + } - float expect_data4[] = {34}; - param = GetStridedSliceParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 1}, expect_data4}, param, false); + float output_data4[] = {34}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{1, 1, 1, 1}, output_data4}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, 4D_stride2) { +TEST_F(TestOpenCL_StridedSlice, 4D_stride2) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data[] = {13, 14, 31, 32}; - auto *param = GetStridedSliceParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1}); - TestMain({{{1, 4, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); + float output_data[] = {13, 14, 31, 32}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1}); + TestMain({{{1, 4, 3, 3}, input_data, VAR}}, {{1, 2, 1, 2}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, 4D_to_3D) { +TEST_F(TestOpenCL_StridedSlice, 4D_to_3D) { float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; - float expect_data[] = {18, 20, 21, 23, 27, 29, 30, 32}; - auto *param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2}); - TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 2}, expect_data}, param, false); + float output_data[] = {18, 20, 21, 23, 27, 29, 30, 32}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2}); + TestMain({{{2, 2, 3, 3}, input_data, VAR}}, {{2, 2, 2}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, In1D_OutOfRangeBeginNegativeStride) { +TEST_F(TestOpenCL_StridedSlice, In1D_OutOfRangeBeginNegativeStride) { float input_data[] = {1, 2, 3, 4}; - float expect_data[] = {4, 3, 2}; - auto *param = GetStridedSliceParameter({5}, {0}, {-1}); - TestMain({{{4}, input_data, Tensor::Category::VAR}}, {{3}, expect_data}, param, false); + float output_data[] = {4, 3, 2}; + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({5}, {0}, {-1}); + TestMain({{{4}, input_data, VAR}}, {{3}, output_data}, param, fp16_enable); + } } -TEST_F(TestStridedSliceOpenCL, tflite_cpu) { +TEST_F(TestOpenCL_StridedSlice, test0) { std::vector values(32768); for (int i = 0; i < values.size(); ++i) { values[i] = i % 1000; @@ -290,28 +311,30 @@ TEST_F(TestStridedSliceOpenCL, tflite_cpu) { auto &name = std::get<0>(case_); auto &input_shape = std::get<1>(case_); auto &output_shape = std::get<2>(case_); - auto &input_data = std::get<3>(case_); - auto &expect_data = std::get<4>(case_); + auto input_data = std::get<3>(case_).data(); + auto output_data = std::get<4>(case_).data(); auto &begin = std::get<5>(case_); auto &end = std::get<6>(case_); auto &stride = std::get<7>(case_); - std::cout << name << std::endl; - auto *param = GetStridedSliceParameter(begin, end, stride); - TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, - false); - param = GetStridedSliceParameter(begin, end, stride); - TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, - true); + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(begin, end, stride); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } } -TEST_F(TestStridedSliceOpenCL, tflite_opencl) { - float input_data[] = {0.1f, 0.2f, 0.3f, 0.4, 1.1f, 1.2f, 1.3f, 1.4, 10.1f, 10.2f, 10.3f, 10.4, - 11.1f, 11.2f, 11.3f, 11.4, 20.1f, 20.2f, 20.3f, 20.4, 21.1f, 21.2f, 21.3f, 21.4}; - float expect_data[] = {10.2, 10.4, 20.2, 20.4}; - auto *param = GetStridedSliceParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2}); - TestMain({{{1, 3, 2, 4}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); +TEST_F(TestOpenCL_StridedSlice, test1) { + float input_data[] = {0.1, 0.2, 0.3, 0.4, 1.1, 1.2, 1.3, 1.4, 10.1, 10.2, 10.3, 10.4, + 11.1, 11.2, 11.3, 11.4, 20.1, 20.2, 20.3, 20.4, 21.1, 21.2, 21.3, 21.4}; + float output_data[] = {10.2, 10.4, 20.2, 20.4}; + + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2}); + TestMain({{{1, 3, 2, 4}, input_data, VAR}}, {{1, 2, 1, 2}, output_data}, param, fp16_enable, + fp16_enable ? 1e-2 : 1e-9); + } } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc index f5f3259860..b61c701afb 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/to_format_tests.cc @@ -22,8 +22,8 @@ #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" #include "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h" -namespace mindspore { -class TestToFormatOpenCL : public mindspore::CommonTest { +namespace mindspore::lite::opencl::test { +class TestToFormatOpenCL : public CommonTest { public: TestToFormatOpenCL() {} }; @@ -103,4 +103,4 @@ TEST_F(TestToFormatOpenCL, ToFormatNHWC2NCHW) { ASSERT_EQ(0, CompareOutputData(output_data, correct_data, h * w * c, 0.00001)); MS_LOG(INFO) << "Test TransposeFp32 passed"; } -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc index 90719a1e20..104605962f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/transpose_tests.cc @@ -13,153 +13,57 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" -#include "common/common_test.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +#include "ut/src/runtime/kernel/opencl/common.h" +#include "nnacl/transpose.h" -namespace mindspore { -class TestTransposeOpenCL : public mindspore::CommonTest { - public: - TestTransposeOpenCL() {} -}; +namespace mindspore::lite::opencl::test { -void RunTestTranspose(const std::vector &shape, void *input_data, void *output_data, bool enable_fp16) { - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - size_t dtype_size = enable_fp16 ? sizeof(float16_t) : sizeof(float); - ocl_runtime->SetFp16Enable(enable_fp16); - auto param = static_cast(malloc(sizeof(TransposeParameter))); - if (param == nullptr) { - MS_LOG(ERROR) << "param_ptr create error."; - return; - } - param->num_axes_ = 4; - param->perm_[0] = shape[3]; - param->perm_[1] = shape[4]; - param->perm_[2] = shape[5]; - param->perm_[3] = shape[6]; - auto allocator = ocl_runtime->GetAllocator(); - int h = shape[0]; - int w = shape[1]; - int c = shape[2]; - std::vector input_shape = {1, h, w, c}; - auto tensor_x_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - input_shape, schema::Format_NHWC); - auto tensor_x = tensor_x_ptr.get(); - if (tensor_x == nullptr) { - MS_LOG(ERROR) << "tensor_x create error."; - return; - } - std::vector out_shape = {input_shape[param->perm_[0]], input_shape[param->perm_[1]], - input_shape[param->perm_[2]], input_shape[param->perm_[3]]}; - auto tensor_out_ptr = std::make_unique(TypeId(enable_fp16 ? kNumberTypeFloat16 : kNumberTypeFloat32), - out_shape, schema::Format_NHWC); - auto tensor_out = tensor_out_ptr.get(); - if (tensor_out == nullptr) { - MS_LOG(ERROR) << "tensor_out create error."; - return; - } - std::vector inputs{tensor_x}; - std::vector outputs{tensor_out}; - auto arith_kernel = kernel::OpenCLKernelCreator( - inputs, outputs, reinterpret_cast(param), nullptr, kernel::KernelKey(), nullptr); - if (arith_kernel == nullptr) { - MS_LOG(ERROR) << "arith_kernel create error."; - return; - } - - inputs[0]->MallocData(allocator); +class TestOpenCL_Transpose : public CommonTest {}; - std::vector kernels{arith_kernel}; - auto pGraph_ptr = std::make_unique(inputs, outputs, kernels, kernels, kernels); - auto pGraph = pGraph_ptr.get(); - if (pGraph == nullptr) { - MS_LOG(ERROR) << "pGraph create error."; - return; +namespace { +// PrimitiveType_Transpose: src/ops/populate/transpose_populate.cc +// src/ops/populate/nchw2nhwc_populate.cc +// src/ops/populate/nhwc2nchw_populate.cc +OpParameter *CreateParameter(const std::vector &perm) { + auto *param = test::CreateParameter(schema::PrimitiveType_Transpose); + param->num_axes_ = perm.size(); + for (int i = 0; i < perm.size(); ++i) { + param->perm_[i] = perm[i]; } - pGraph->Init(); - memcpy(inputs[0]->MutableData(), input_data, h * w * c * dtype_size); - pGraph->Run(); + return reinterpret_cast(param); +} +} // namespace - if (enable_fp16) { - CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast(1e-3), 2e-2); - } else { - CompareOutput(outputs[0]->MutableData(), output_data, h * w * c, static_cast(1e-5)); +TEST_F(TestOpenCL_Transpose, NHWC2NCHW) { + std::vector input_shape = {1, 2, 2, 3}; + std::vector perm = {0, 3, 1, 2}; + std::vector output_shape; + for (int axis : perm) { + output_shape.push_back(input_shape[axis]); } + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11}; - for (auto t : inputs) { - t->set_data(nullptr); - } - for (auto t : outputs) { - t->set_data(nullptr); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(perm); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); } - - MS_LOG(INFO) << "Test TransposeFp32 passed"; -} - -TEST_F(TestTransposeOpenCL, TransposeNHWC2NCHWFp32) { - int h = 2; - int w = 2; - int c = 3; - int perm0 = 0; - int perm1 = 3; - int perm2 = 1; - int perm3 = 2; - std::vector shape = {h, w, c, perm0, perm1, perm2, perm3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 3.0f, 6.0f, 9.0f, 1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f, 8.0f, 11.0f}; - - RunTestTranspose(shape, input_data.data(), output_data.data(), false); -} - -TEST_F(TestTransposeOpenCL, TransposeNHWC2NCHWFp16) { - int h = 2; - int w = 2; - int c = 3; - int perm0 = 0; - int perm1 = 3; - int perm2 = 1; - int perm3 = 2; - std::vector shape = {h, w, c, perm0, perm1, perm2, perm3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 3.0f, 6.0f, 9.0f, 1.0f, 4.0f, 7.0f, 10.0f, 2.0f, 5.0f, 8.0f, 11.0f}; - - RunTestTranspose(shape, input_data.data(), output_data.data(), true); } -TEST_F(TestTransposeOpenCL, TransposeNCHW2NHWCFp32) { - int h = 2; - int w = 2; - int c = 3; - int perm0 = 0; - int perm1 = 2; - int perm2 = 3; - int perm3 = 1; - std::vector shape = {h, w, c, perm0, perm1, perm2, perm3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 6.0f, 1.0f, 7.0f, 2.0f, 8.0f, 3.0f, 9.0f, 4.0f, 10.0f, 5.0f, 11.0f}; +TEST_F(TestOpenCL_Transpose, NCHW2NHWC) { + std::vector input_shape = {1, 2, 2, 3}; + std::vector perm = {0, 2, 3, 1}; + std::vector output_shape; + for (int axis : perm) { + output_shape.push_back(input_shape[axis]); + } + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + float output_data[] = {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11}; - RunTestTranspose(shape, input_data.data(), output_data.data(), false); + for (auto fp16_enable : {false, true}) { + auto *param = CreateParameter(perm); + TestMain({{input_shape, input_data, VAR}}, {output_shape, output_data}, param, fp16_enable); + } } -TEST_F(TestTransposeOpenCL, TransposeNCHW2NHWCFp16) { - int h = 2; - int w = 2; - int c = 3; - int perm0 = 0; - int perm1 = 2; - int perm2 = 3; - int perm3 = 1; - std::vector shape = {h, w, c, perm0, perm1, perm2, perm3}; - std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f}; - std::vector output_data = {0.0f, 6.0f, 1.0f, 7.0f, 2.0f, 8.0f, 3.0f, 9.0f, 4.0f, 10.0f, 5.0f, 11.0f}; - - RunTestTranspose(shape, input_data.data(), output_data.data(), true); -} -} // namespace mindspore +} // namespace mindspore::lite::opencl::test diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc deleted file mode 100644 index 1aa68f4363..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include "common/common_test.h" -#include "src/kernel_registry.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" - -using mindspore::kernel::LiteKernel; -using mindspore::kernel::SubGraphOpenCLKernel; -using mindspore::lite::KernelRegistry; -using mindspore::lite::Tensor; -using mindspore::schema::Format::Format_NHWC; - -namespace mindspore { - -void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) { - if (file_path.empty()) { - memset(dst, 0x00, dst_size); - } else { - auto src_data = reinterpret_cast(mindspore::lite::ReadFile(file_path.c_str(), &dst_size)); - if (src_data != nullptr) { - memcpy(dst, src_data, dst_size); - } else { - MS_LOG(ERROR) << "read file empty."; - } - } -} - -void TestMain(const std::vector, float *, Tensor::Category>> &input_infos, - std::tuple, float *> output_info, OpParameter *op_parameter, bool fp16_enable, - float atol, bool print_output) { - MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; - auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); - auto ocl_runtime = runtime_wrapper.GetInstance(); - EXPECT_TRUE(ocl_runtime->Init() == RET_OK); - ocl_runtime->SetFp16Enable(fp16_enable); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(DEBUG) << "create Tensors & init weight data"; - std::vector tensors; - std::vector kernel_inputs; - std::vector subgraph_inputs; - std::map subgraph_inputs_data; - for (auto input_info : input_infos) { - const std::vector &shape = std::get<0>(input_info); - auto *input_data = std::get<1>(input_info); - const Tensor::Category category = std::get<2>(input_info); - tensors.emplace_back(kNumberTypeFloat32, shape, Format_NHWC, category); - auto *new_tensor = &tensors.back(); - kernel_inputs.push_back(new_tensor); - if (category != Tensor::Category::VAR) { - memcpy(new_tensor->MutableData(), input_data, new_tensor->Size()); - } else { - subgraph_inputs.push_back(new_tensor); - subgraph_inputs_data[new_tensor] = input_data; - } - } - const std::vector &output_shape = std::get<0>(output_info); - float *expect_data = std::get<1>(output_info); - auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, Tensor::Category::VAR); - - MS_LOG(DEBUG) << "create OpenCL Kernel"; - auto primitive_type = static_cast(op_parameter->type_); - kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type}; - auto creator = KernelRegistry::GetInstance()->GetCreator(key); - if (creator == nullptr) { - std::cerr << "get kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; - free(op_parameter); - FAIL(); - } - auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr); - if (kernel == nullptr) { - std::cerr << "call kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; - free(op_parameter); - FAIL(); - } - - MS_LOG(DEBUG) << "create SubGraph & init input data"; - std::vector kernels{kernel}; - auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels); - if (sub_graph == nullptr) { - return; - } - for (auto input : subgraph_inputs) { - EXPECT_TRUE(input->MallocData(allocator) == RET_OK); - } - EXPECT_TRUE(sub_graph->Init() == RET_OK); - for (auto input : subgraph_inputs) { - memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); - } - - MS_LOG(DEBUG) << "run SubGraph & compare result"; - EXPECT_TRUE(sub_graph->Run() == RET_OK); - if (print_output) { - for (int i = 0; i < output.ElementsNum(); ++i) { - printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], reinterpret_cast(output.data_c())[i]); - } - } - CommonTest::CompareOutputData(reinterpret_cast(output.data_c()), expect_data, output.ElementsNum(), atol); - - MS_LOG(DEBUG) << "release resources"; - delete sub_graph; -} - -} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h deleted file mode 100644 index dadcbd00a9..0000000000 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ -#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ - -#include -#include -#include -#include -#include -#include "mindspore/lite/src/tensor.h" -#include "mindspore/lite/src/common/file_utils.h" - -using mindspore::lite::Tensor; - -namespace mindspore { - -void LoadTestData(void *dst, size_t dst_size, const std::string &file_path); - -template -void CompareOutput(void *output, void *expect, size_t elem_num, T atol, float rtol = 1e-5) { - T *output_data = reinterpret_cast(output); - T *expect_data = reinterpret_cast(expect); - - std::cout << std::setprecision(5) << std::setiosflags(std::ios::fixed) << std::setw(7); - std::cout << "output[0:12]:"; - for (int i = 0; i < 12 && i < elem_num; i++) { - std::cout << output_data[i] << " "; - } - std::cout << std::endl; - std::cout << "expect[0:12]:"; - for (int i = 0; i < 12 && i < elem_num; i++) { - std::cout << expect_data[i] << " "; - } - std::cout << std::endl; - for (int i = 0; i < elem_num; ++i) { - auto left = static_cast(std::fabs(output_data[i] - expect_data[i])); - auto right = static_cast(atol + rtol * std::fabs(expect_data[i])); - if (left > right) { - std::cout << "error at idx[" << i << "] expect=" << expect_data[i] << " output=" << output_data[i] << std::endl; - } - ASSERT_LE(left, right); - } - std::cout << "compare success!" << std::endl; -} - -template -void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T atol, float rtol = 1e-5) { - size_t output_size; - auto expect_data = mindspore::lite::ReadFile(file_path.c_str(), &output_size); - CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); -} - -void TestMain(const std::vector, float *, Tensor::Category>> &input_infos, - std::tuple, float *> output_info, OpParameter *op_parameter, bool fp16_enable = false, - float atol = 10e-9, bool print_output = false); - -} // namespace mindspore - -#endif // TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_