diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl deleted file mode 100644 index 50492b36d9..0000000000 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl +++ /dev/null @@ -1,146 +0,0 @@ -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#define INT2 int2 -#define INT4 int4 -__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; -__kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, - INT4 begin, INT2 sharedNoUpdiv) { - int X = get_global_id(1); // H - int Y = get_global_id(2); // W - if (X >= out_shape.y || Y >= out_shape.z) { - return; - } - FLT4 result; - if (sharedNoUpdiv.x % 4 == 0) { - for (int i = 0; i < out_shape.w; i++) { - result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (i + begin.w), (X + begin.y))); - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result); - } - } else { - int begin_postion = sharedNoUpdiv.x % 4; - FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y))); - if (begin_postion == 1) { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); - result.x = first.y; - result.y = first.z; - result.z = first.w; - result.w = second.x; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); - first.y = second.y; - first.z = second.z; - first.w = second.w; - } - } else if (begin_postion == 2) { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); - result.x = first.z; - result.y = first.w; - result.z = second.x; - result.w = second.y; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); - first.z = second.z; - first.w = second.w; - } - } else { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y))); - result.x = first.w; - result.y = second.x; - result.z = second.y; - result.w = second.z; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result); - first.w = second.w; - } - } - } - // judge the line of size - int size = sharedNoUpdiv.y % 4; - FLT4 result_fill0; - if (size == 1) { - result_fill0.x = result.x; - result_fill0.y = 0; - result_fill0.z = 0; - result_fill0.w = 0; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); - } else if (size == 2) { - result_fill0.x = result.x; - result_fill0.y = result.y; - result_fill0.z = 0; - result_fill0.w = 0; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); - } else if (size == 3) { - result_fill0.x = result.x; - result_fill0.y = result.y; - result_fill0.z = result.z; - result_fill0.w = 0; - WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); - } -} -__kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, - INT4 begin, INT2 sharedNoUpdiv) { - int X = get_global_id(1); // H - int Y = get_global_id(2); // W - if (X >= out_shape.y || Y >= out_shape.z) { - return; - } - FLT4 result; - if (sharedNoUpdiv.x % 4 == 0) { - for (int i = 0; i < out_shape.w; i++) { - result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); - WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result); - } - } else { - int begin_postion = sharedNoUpdiv.x % 4; - FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y))); - if (begin_postion == 1) { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); - result.x = first.y; - result.y = first.z; - result.z = first.w; - result.w = second.x; - WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); - first.y = second.y; - first.z = second.z; - first.w = second.w; - } - } else if (begin_postion == 2) { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); - result.x = first.z; - result.y = first.w; - result.z = second.x; - result.w = second.y; - WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); - first.z = second.z; - first.w = second.w; - } - } else { - for (int i = 1; i <= out_shape.w; i++) { - FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); - result.x = first.w; - result.y = second.x; - result.z = second.y; - result.w = second.z; - WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); - first.w = second.w; - } - } - } - // judge the line of size - int size = sharedNoUpdiv.y % 4; - FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); - if (size == 1) { - result_fill0.x = result.x; - WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); - } else if (size == 2) { - result_fill0.x = result.x; - result_fill0.y = result.y; - WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); - } else if (size == 3) { - result_fill0.x = result.x; - result_fill0.y = result.y; - result_fill0.z = result.z; - WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); - } -} diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl new file mode 100644 index 0000000000..b1f0c24aae --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl @@ -0,0 +1,59 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + +__kernel void strided_slice(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape, + int4 output_shape, int2 io_slices, int4 begin, int4 stride, int4 size) { + int IN = input_shape.x, IH = input_shape.y, IW = input_shape.z, CI = input_shape.w; + int ON = output_shape.x, OH = output_shape.y, OW = output_shape.z, CO = output_shape.w; + int CI_SLICES = io_slices.x, CO_SLICES = io_slices.y; + int on_oh = get_global_id(0); + int ow = get_global_id(1); + int co_slice = get_global_id(2); + int on = on_oh / OH; + int oh = on_oh % OH; + if (on >= ON || oh >= OH || ow >= OW || co_slice >= CO_SLICES) { + return; + } + + FLT tmp[4]; + for (int i = 0; i < 4; ++i) { + // output_shape idx -> size idx. because squeeze(output_shape)=squeeze(size) + // for example: + // python code: B = A[1, 1:16, 2:16, 3:16] + // input_shape = [16, 16, 16, 16] + // begin = [ 1, 1, 2, 3] + // end = [ 2, 16, 16, 16] + // stride = [ 1, 1, 1, 1] + // size = [ 1, 15, 14, 13] = ceil((end - begin) / stride) + // output_shape = [ 15, 14, 13] + int idx = ((on * OH + oh) * OW + ow) * CO + co_slice * 4 + i; + int co_ = idx % size.w; + idx /= size.w; + int ow_ = idx % size.z; + idx /= size.z; + int oh_ = idx % size.y; + idx /= size.y; + int on_ = idx; + + int in = begin.x + stride.x * on_; + int ih = begin.y + stride.y * oh_; + int iw = begin.z + stride.z * ow_; + int ci = begin.w + stride.w * co_; + + FLT4 src = READ_IMAGE(input, smp_none, (int2)(iw * CI_SLICES + ci / 4, in * IH + ih)); + int offset = ci % 4; + if (offset == 0) { + tmp[i] = src.x; + } else if (offset == 1) { + tmp[i] = src.y; + } else if (offset == 2) { + tmp[i] = src.z; + } else { + tmp[i] = src.w; + } + } + + FLT4 out = (FLT4)(tmp[0], tmp[1], tmp[2], tmp[3]); + WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), out); +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc deleted file mode 100644 index c8d6b009dc..0000000000 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Copyright 2019 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "src/kernel_registry.h" -#include "src/runtime/kernel/opencl/kernel/slice.h" -#include "src/runtime/kernel/opencl/utils.h" -#include "src/runtime/kernel/opencl/cl/slice.cl.inc" - -using mindspore::kernel::KERNEL_ARCH::kGPU; -using mindspore::lite::KernelRegistrar; -using mindspore::lite::RET_ERROR; -using mindspore::lite::RET_OK; -using mindspore::schema::PrimitiveType_Slice; - -namespace mindspore::kernel { - -int SliceOpenCLKernel::Init() { - std::set build_options; - std::string source = slice_source; - std::string program_name = "slice"; - std::string kernel_name = "slice_NHWC4"; - ocl_runtime_->LoadSource(program_name, source); - ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options); - MS_LOG(DEBUG) << kernel_name << " Init Done!"; - return RET_OK; -} - -void SlcieGetWorkGroup(const std::vector &global, std::vector *local, int max_size) { - const int max_divider = 8; - const int max_x = 4, max_y = 8; - int x = std::min(GetMaxDivisorStrategy1(global[0], max_divider), max_x); - int yz = max_size / x; - int y = std::min(std::min(GetMaxDivisorStrategy1(global[1], max_divider), yz), max_y); - int z = std::min(yz / y, static_cast(UP_DIV(global[2], 2))); - - local->clear(); - local->push_back(x); - local->push_back(y); - local->push_back(z); -} - -int SliceOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running! "; - auto param = reinterpret_cast(this->op_parameter_); - auto input_shape = in_tensors_[0]->shape(); - cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)}; - cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)}; - cl_int4 begin_ = {param->begin_[0], param->begin_[1], param->begin_[2], param->begin_[3] / 4}; - cl_int2 sharedNoUpdiv = {param->begin_[3], param->size_[3]}; - uint32_t OH = param->size_[1]; - uint32_t OW = param->size_[2]; - - const std::vector &max_global = ocl_runtime_->GetWorkItemSize(); - std::vector local = {1, 1, 1}; // init local - std::vector global = {1, OH, OW}; - SlcieGetWorkGroup(global, &local, max_global[0]); - int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); // out tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv); - ocl_runtime_->RunKernel(kernel_, global, local, nullptr); - - return RET_OK; -} - -kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector &inputs, - const std::vector &outputs, OpParameter *opParameter, - const lite::InnerContext *ctx, const kernel::KernelKey &desc, - const mindspore::lite::PrimitiveC *primitive) { - auto *kernel = new (std::nothrow) SliceOpenCLKernel(opParameter, inputs, outputs); - if (kernel == nullptr) { - MS_LOG(ERROR) << " new SliceOpenCLKernel failed "; - free(opParameter); - return nullptr; - } - auto ret = kernel->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << " Init kernel failed, name: Slice "; - delete kernel; - return nullptr; - } - return kernel; -} - -REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLSliceKernelCreator); -REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLSliceKernelCreator); -} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc new file mode 100644 index 0000000000..2c0f1e3efb --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -0,0 +1,192 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include "src/kernel_registry.h" +#include "src/runtime/kernel/opencl/kernel/strided_slice.h" +#include "src/runtime/kernel/opencl/utils.h" +#include "src/runtime/kernel/opencl/cl/strided_slice.cl.inc" +#include "nnacl/strided_slice.h" + +using mindspore::kernel::KERNEL_ARCH::kGPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Slice; +using mindspore::schema::PrimitiveType_StridedSlice; + +namespace mindspore::kernel { + +int SliceOpenCLKernel::CheckSpecs() { + const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice"; + if (in_tensors_.size() != 1) { + MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor."; + return RET_ERROR; + } + if (out_tensors_.size() != 1) { + MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor."; + return RET_ERROR; + } + auto in_ndim = in_tensors_.front()->shape().size(); + if (in_ndim == 0 || in_ndim > 4) { + MS_LOG(ERROR) << kernel_name + " only supports 1D-4D input tensor"; + return RET_ERROR; + } + auto out_ndim = out_tensors_.front()->shape().size(); + if (out_ndim > 4) { + MS_LOG(ERROR) << kernel_name + " only supports 0D-4D output tensor"; + return RET_ERROR; + } + if (InitConstArgs() != RET_OK) { + MS_LOG(ERROR) << "call SliceOpenCLKernel::InitConstArgs() failed"; + return RET_ERROR; + } + return RET_OK; +} + +int SliceOpenCLKernel::Prepare() { + std::set build_options; + std::string program_name = "strided_slice"; + ocl_runtime_->LoadSource(program_name, strided_slice_source); + ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options); + SetConstArgs(); + SetGlobalLocal(); + return RET_OK; +} + +int SliceOpenCLKernel::InitConstArgs() { + auto input_info = Image2DInfo(in_tensors_.front()); + auto output_info = Image2DInfo(out_tensors_.front()); + input_shape_ = {static_cast(input_info.N), static_cast(input_info.H), + static_cast(input_info.W), static_cast(input_info.C)}; + output_shape_ = {static_cast(output_info.N), static_cast(output_info.H), + static_cast(output_info.W), static_cast(output_info.C)}; + io_slices_ = {static_cast(input_info.Slice), static_cast(output_info.Slice)}; + + if (op_parameter_->type_ == PrimitiveType_Slice) { + auto param = reinterpret_cast(op_parameter_); + Broadcast2GpuShape(param->begin_, begin_.s, param->param_length_, 0); + Broadcast2GpuShape(param->size_, size_.s, param->param_length_, -1); + for (int i = 0; i < 4; ++i) { + if (begin_.s[i] < 0) { + begin_.s[i] += input_shape_.s[i]; + } + if (begin_.s[i] < 0 || begin_.s[i] >= input_shape_.s[i]) { + MS_LOG(ERROR) << "Slice kernel only supports 0<=begin0 but size[i]=" << size_.s[i]; + return RET_ERROR; + } + if (size_.s[i] == -1 || begin_.s[i] + size_.s[i] > input_shape_.s[i]) { + size_.s[i] = input_shape_.s[i] - begin_.s[i]; + } + } + } else { + auto param = reinterpret_cast(op_parameter_); + cl_int4 end = input_shape_; + Broadcast2GpuShape(param->begins_, begin_.s, param->num_axes_, 0); + Broadcast2GpuShape(param->strides_, stride_.s, param->num_axes_, 1); + Broadcast2GpuShape(param->ends_, end.s, param->num_axes_); + + for (int i = 0; i < 4; ++i) { + // begin is negative + if (begin_.s[i] < 0) { + begin_.s[i] += input_shape_.s[i]; + } + // avoid begin is out of range + begin_.s[i] = std::clamp(begin_.s[i], 0, input_shape_.s[i] - 1); + // end is negative + if (end.s[i] < 0) { + end.s[i] += input_shape_.s[i]; + } + // avoid end is out of range + end.s[i] = std::clamp(end.s[i], -1, input_shape_.s[i]); + + // check stride begin end + if (stride_.s[i] > 0) { + if (begin_.s[i] >= end.s[i]) { + MS_LOG(ERROR) << "StridedSlice kernel only supports begin_0"; + return RET_ERROR; + } + } else if (stride_.s[i] < 0) { + if (begin_.s[i] <= end.s[i]) { + MS_LOG(ERROR) << "StridedSlice kernel only supports begin_>end when stride<0"; + return RET_ERROR; + } + } else { + MS_LOG(ERROR) << "StridedSlice kernel only supports stride!=0"; + return RET_ERROR; + } + size_.s[i] = std::ceil(static_cast(end.s[i] - begin_.s[i]) / static_cast(stride_.s[i])); + } + } + + // check size + std::vector shape_not_1; + std::vector size_not_1; + std::copy_if(out_tensors_.front()->shape().begin(), out_tensors_.front()->shape().end(), shape_not_1.begin(), + [](int x) { return x > 1; }); + std::copy_if(size_.s, size_.s + 4, size_not_1.begin(), [](int x) { return x > 1; }); + if (shape_not_1 != size_not_1) { + MS_LOG(ERROR) << "Slice/StridedSlice kernel output shape infer error"; + return RET_ERROR; + } + return RET_OK; +} + +void SliceOpenCLKernel::SetConstArgs() { + int arg_cn = 2; + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_); + ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); +} + +void SliceOpenCLKernel::SetGlobalLocal() { + auto output_info = Image2DInfo(out_tensors_.front()); + std::vector global = {output_info.N * output_info.H, output_info.W, output_info.Slice}; + + const int max_divider = 8; + auto max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize(); + size_t local_c = GetMaxDivisorStrategy0(global[2], max_divider); + size_t local_hw = max_work_group_size / local_c; + size_t local_h = std::min(UP_DIV(global[0], 2), local_hw); + size_t local_w = std::min(local_hw / local_h, global[1]); + std::vector local = {local_h, local_w, local_c}; + AlignGlobalLocal(global, local); +} + +int SliceOpenCLKernel::Run() { + MS_LOG(DEBUG) << this->name() << " Running! "; + ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); + ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); + ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr); + return RET_OK; +} + +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator); +REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator); +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h similarity index 72% rename from mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h rename to mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h index 44fb3871d6..f29c7f19b2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_ -#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_ #include #include "src/runtime/kernel/opencl/opencl_kernel.h" @@ -31,12 +31,23 @@ class SliceOpenCLKernel : public OpenCLKernel { ~SliceOpenCLKernel() override = default; - int Init() override; - + int Prepare() override; int Run() override; + int CheckSpecs() override; + void SetConstArgs() override; + void SetGlobalLocal() override; + private: + int InitConstArgs(); + cl::Kernel kernel_; + cl_int4 input_shape_{}; + cl_int4 output_shape_{}; + cl_int2 io_slices_{}; + cl_int4 begin_{}; + cl_int4 stride_{{1, 1, 1, 1}}; + cl_int4 size_{}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 8ab9120485..8421cb5269 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -34,33 +34,67 @@ struct OpenCLToFormatParameter { lite::opencl::MemType out_mem_type{lite::opencl::MemType::IMG}; }; +template +void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) { + auto *N = dst; + auto *H = dst + 1; + auto *W = dst + 2; + auto *C = dst + 3; + if (src_num == 1) { + *N = src[0]; + } else if (src_num == 2) { + *N = src[0]; + *C = src[1]; + } else if (src_num == 3) { + *N = src[0]; + *W = src[1]; + *C = src[2]; + } else if (src_num == 4) { + *N = src[0]; + *H = src[1]; + *W = src[2]; + *C = src[3]; + } else if (src_num >= 5) { + MS_LOG(ERROR) << "GPU doesn't support ndim>=" << src_num; + } +} + +template +void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num, DstT default_value) { + for (int i = 0; i < 4; ++i) { + dst[i] = default_value; + } + Broadcast2GpuShape(src, dst, src_num); +} + struct Image2DInfo { explicit Image2DInfo(const lite::Tensor *tensor) { if (tensor == nullptr) { return; } - auto shape = tensor->shape(); - if (shape.size() == 1) { + auto ndim = shape.size(); + if (ndim == 1) { N = shape[0]; - } else if (shape.size() == 2) { + } else if (ndim == 2) { N = shape[0]; C = shape[1]; - } else if (shape.size() == 3) { + } else if (ndim == 3) { N = shape[0]; W = shape[1]; C = shape[2]; - } else if (shape.size() == 4) { + } else if (ndim == 4) { N = shape[0]; H = shape[1]; W = shape[2]; C = shape[3]; - } else if (shape.size() >= 5) { - MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size(); + } else if (ndim >= 5) { + MS_LOG(ERROR) << "GPU doesn't support Tensor with ndim>=" << ndim; } + Slice = UP_DIV(C, C4NUM); + FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float); FLT4_size = FLT_size * 4; - Slice = UP_DIV(C, C4NUM); if (W * Slice <= MAX_IMAGE2D_SIZE) { height = N * H; width = W * Slice; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index 454a319841..692ee9b073 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -13,357 +13,151 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include -#include -#include "src/common/log_adapter.h" #include "common/common_test.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h" +#include "nnacl/slice_parameter.h" +#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" namespace mindspore { -class TestSliceOpenCLfp32 : public mindspore::CommonTest { - public: - TestSliceOpenCLfp32() {} -}; -class TestSliceOpenCLfp16 : public mindspore::CommonTest { - public: - TestSliceOpenCLfp16() {} -}; -template -void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bound) { - for (size_t i = 0; i < size; i++) { - T abs = fabs(output_data[i] - correct_data[i]); - ASSERT_LE(abs, err_bound); - } -} - -TEST_F(TestSliceOpenCLfp32, Slicefp32CI) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " Read tensors from .bin "; - std::vector input_shape = {1, 2, 2, 8}; - std::vector output_shape = {1, 2, 2, 5}; - std::vector begin = {0, 0, 0, 2}; - std::vector size = {1, 2, 2, 5}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - - float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, - 0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498, - -1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366, - -1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552, - 0.95056856, -0.06886655, 0.5316237, 0.05787678}; - float correct_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, - 0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151, - -0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237}; - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - delete tensor_data; - MS_LOG(INFO) << " init tensor failed "; - return; - } - std::vector inputs = {tensor_data}; - std::vector outputs = {output_tensor}; +class TestSliceOpenCL : public mindspore::CommonTest {}; - MS_LOG(INFO) << "setting SliceParameter "; - auto param = reinterpret_cast(malloc(sizeof(SliceParameter))); +OpParameter *GetSliceParameter(const std::vector &begin, const std::vector &size) { + auto param = static_cast(malloc(sizeof(SliceParameter))); if (param == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - MS_LOG(INFO) << "new SliceParameter failed "; - return; + MS_LOG(ERROR) << "SliceParameter create error."; + return nullptr; } - for (int i = 0; i < input_shape.size(); i++) { + param->op_parameter_.type_ = schema::PrimitiveType_Slice; + param->param_length_ = begin.size(); + for (int i = 0; i < begin.size(); ++i) { param->begin_[i] = begin[i]; param->size_[i] = size[i]; } - - auto *slice_kernel = - new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (slice_kernel == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - MS_LOG(INFO) << "new kernel::slice_kernel failed "; - return; - } - slice_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{slice_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete slice_kernel; - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - return; - } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, sizeof(input_data)); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; + return reinterpret_cast(param); } -TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " Read tensors from .bin "; - std::vector input_shape = {1, 19, 19, 96}; - std::vector output_shape = {1, 10, 10, 13}; - std::vector begin = {0, 2, 3, 4}; - std::vector size = {1, 10, 10, 13}; - auto data_type = kNumberTypeFloat32; - auto tensor_type = lite::Tensor::CONST_TENSOR; - - // get the input from .bin - size_t input_size, output_size; - std::string input_path = "./test_data/in_slicefp32.bin"; - std::string output_path = "./test_data/out_slicefp32.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); - if (output_tensor == nullptr) { - delete tensor_data; - MS_LOG(INFO) << " init tensor failed "; - return; - } - std::vector inputs = {tensor_data}; - std::vector outputs = {output_tensor}; - - MS_LOG(INFO) << "setting SliceParameter "; - auto param = reinterpret_cast(malloc(sizeof(SliceParameter))); - if (param == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - MS_LOG(INFO) << "new SliceParameter failed "; - return; - } - for (int i = 0; i < input_shape.size(); i++) { - param->begin_[i] = begin[i]; - param->size_[i] = size[i]; - } - - auto *slice_kernel = - new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (slice_kernel == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - MS_LOG(INFO) << "new kernel::slice_kernel failed "; - return; - } - slice_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{slice_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete slice_kernel; - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - return; - } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, input_size); - - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; +TEST_F(TestSliceOpenCL, 4D) { + float input_data[] = {-0.45816937, 0.92391545, -0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, + 0.09470133, 0.19801073, 0.04927751, -1.2808367, 0.1470597, 0.03393711, -0.33282498, + -1.0433807, -1.3678077, -0.6423931, 0.5584889, 0.28965706, 0.5343769, 0.75480366, + -1.9328151, -0.48714373, 1.711132, -1.8871949, -0.2987629, -0.14000037, -0.080552, + 0.95056856, -0.06886655, 0.5316237, 0.05787678}; + float expect_data[] = {-0.9135602, -1.4002057, 1.1080881, 0.40712625, -0.28128958, -1.2808367, 0.1470597, + 0.03393711, -0.33282498, -1.0433807, 0.28965706, 0.5343769, 0.75480366, -1.9328151, + -0.48714373, -0.14000037, -0.080552, 0.95056856, -0.06886655, 0.5316237}; + auto param = GetSliceParameter({0, 0, 0, 2}, {1, 2, 2, 5}); + TestMain({{{1, 2, 2, 8}, input_data, Tensor::Category::VAR}}, {{1, 2, 2, 5}, expect_data}, param, false); } -TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) { - MS_LOG(INFO) << " begin test "; - auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance(); - ocl_runtime->SetFp16Enable(true); - ocl_runtime->Init(); - auto allocator = ocl_runtime->GetAllocator(); - - MS_LOG(INFO) << " Read tensors from .bin "; - std::vector input_shape = {1, 25, 25, 48}; - std::vector output_shape = {1, 24, 24, 15}; - std::vector begin = {0, 1, 1, 7}; - std::vector size = {1, 24, 24, 15}; - auto data_type = kNumberTypeFloat16; - auto tensor_type = lite::Tensor::CONST_TENSOR; - - // get the input from .bin - size_t input_size, output_size; - std::string input_path = "./test_data/in_slicefp16.bin"; - std::string output_path = "./test_data/out_slicefp16.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - auto correct_data = reinterpret_cast(mindspore::lite::ReadFile(output_path.c_str(), &output_size)); - - MS_LOG(INFO) << " construct tensors "; - lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type); - if (tensor_data == nullptr) { - MS_LOG(INFO) << " init tensor failed "; - return; - } - auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); - if (output_tensor == nullptr) { - delete tensor_data; - MS_LOG(INFO) << " init tensor failed "; - return; +TEST_F(TestSliceOpenCL, tflite_cpu) { + std::vector, std::vector, std::vector, std::vector, + std::vector, std::vector>> + cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {2}}, + {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {1, 2}}, + {"In3D", + {2, 3, 2}, + {2, 3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0}, + {2, 3, 2}}, + {"InputFloat", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}}, + {"IndexInt64", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}}, + {"InputInteger1", + {3, 2, 3, 1}, + {1, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3}, + {1, 0, 0, 0}, + {1, 1, 3, 1}}, + {"InputInteger2", + {3, 2, 3, 1}, + {1, 2, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 4, 4, 4}, + {1, 0, 0, 0}, + {1, 2, 3, 1}}, + {"InputInteger3", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, 3, 1}}, + {"SizeMinus1", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, -1, 1}}, + {"BeginNonZeroSizeMinus1Axis1", + {3, 3, 2, 1}, + {2, 2, 1, 1}, + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, + {5, 6, 8, 9}, + {1, 1, 0, 0}, + {2, -1, 1, 1}}, + {"BeginNonZeroSizeMinus1Axis2", + {3, 2, 3, 1}, + {2, 1, 2, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 5, 5}, + {1, 0, 1, 0}, + {2, 1, -1, 1}}, + {"BeginNonZeroSizeMinus1Axis3", + {3, 1, 2, 3}, + {2, 1, 1, 2}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 5, 5}, + {1, 0, 0, 1}, + {2, 1, 1, -1}}, + {"SliceUint8", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, -1, 1}}, + {"SliceInt8", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, -1, 1}}, + {"SliceInt16", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, -1, 1}}, + {"SliceInt64", + {3, 2, 3, 1}, + {2, 1, 3, 1}, + {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6}, + {3, 3, 3, 5, 5, 5}, + {1, 0, 0, 0}, + {2, 1, -1, 1}}}; + + for (auto &case_ : cases) { + auto &name = std::get<0>(case_); + auto &input_shape = std::get<1>(case_); + auto &output_shape = std::get<2>(case_); + auto &input_data = std::get<3>(case_); + auto &expect_data = std::get<4>(case_); + auto &begin = std::get<5>(case_); + auto &size = std::get<6>(case_); + + std::cout << name << std::endl; + auto *param = GetSliceParameter(begin, size); + TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, + false); + param = GetSliceParameter(begin, size); + TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, + true); } - std::vector inputs = {tensor_data}; - std::vector outputs = {output_tensor}; - - MS_LOG(INFO) << " setting SliceParameter "; - auto param = reinterpret_cast(malloc(sizeof(SliceParameter))); - if (param == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - MS_LOG(INFO) << " new SliceParameter failed "; - return; - } - for (int i = 0; i < input_shape.size(); i++) { - param->begin_[i] = begin[i]; - param->size_[i] = size[i]; - } - - auto *slice_kernel = - new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast(param), inputs, outputs); - if (slice_kernel == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - MS_LOG(INFO) << " new kernel::slice_kernel failed "; - return; - } - slice_kernel->Init(); - - // to do allocate memory for inputs and outputs - for (auto &input_tensor : inputs) { - input_tensor->MallocData(allocator); - } - - MS_LOG(INFO) << " initialize sub_graph "; - std::vector kernels{slice_kernel}; - auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels); - if (sub_graph == nullptr) { - for (auto tensor : inputs) { - delete tensor; - } - for (auto tensor : outputs) { - delete tensor; - } - delete param; - delete slice_kernel; - MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed "; - return; - } - sub_graph->Init(); - - MS_LOG(INFO) << " init tensors "; - memcpy(inputs[0]->data_c(), input_data, input_size); +} // namespace mindspore - std::cout << "==================output data================" << std::endl; - sub_graph->Run(); - auto *output_data_gpu = reinterpret_cast(output_tensor->data_c()); - CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001); - for (auto tensor : inputs) { - tensor->set_data(nullptr); - delete tensor; - } - for (auto tensor : outputs) { - tensor->set_data(nullptr); - delete tensor; - } - delete sub_graph; -} } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc new file mode 100644 index 0000000000..4b4410841d --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc @@ -0,0 +1,317 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/common_test.h" +#include "nnacl/strided_slice.h" +#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" + +namespace mindspore { + +class TestStridedSliceOpenCL : public mindspore::CommonTest {}; + +OpParameter *GetStridedSliceParameter(const std::vector &begins, const std::vector &ends, + const std::vector &strides) { + auto param = static_cast(malloc(sizeof(StridedSliceParameter))); + if (param == nullptr) { + MS_LOG(ERROR) << "create StridedSliceParameter error."; + return nullptr; + } + param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice; + param->num_axes_ = begins.size(); + for (int i = 0; i < begins.size(); ++i) { + param->begins_[i] = begins[i]; + param->ends_[i] = ends[i]; + param->strides_[i] = strides[i]; + } + return reinterpret_cast(param); +} + +TEST_F(TestStridedSliceOpenCL, 1D) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + float expect_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33}; + auto *param = GetStridedSliceParameter({3}, {36}, {3}); + TestMain({{{36}, input_data, Tensor::Category::VAR}}, {{11}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, 2D) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + float expect_data[] = {11, 14}; + auto *param = GetStridedSliceParameter({1, 2}, {3, 8}, {2, 3}); + TestMain({{{4, 9}, input_data, Tensor::Category::VAR}}, {{1, 2}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, 3D) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + float expect_data[] = {11, 14}; + auto *param = GetStridedSliceParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3}); + TestMain({{{1, 4, 9}, input_data, Tensor::Category::VAR}}, {{1, 1, 2}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, 4D) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + + float expect_data0[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + auto *param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, false); + + param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, true); + + float expect_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 3, 3}, expect_data1}, param, false); + + float expect_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35}; + param = GetStridedSliceParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 3, 3}, expect_data2}, param, false); + + float expect_data3[] = {33, 34, 35}; + param = GetStridedSliceParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 3}, expect_data3}, param, false); + + float expect_data4[] = {34}; + param = GetStridedSliceParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 1}, expect_data4}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, 4D_stride2) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + float expect_data[] = {13, 14, 31, 32}; + auto *param = GetStridedSliceParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1}); + TestMain({{{1, 4, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, 4D_to_3D) { + float input_data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35}; + float expect_data[] = {18, 20, 21, 23, 27, 29, 30, 32}; + auto *param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2}); + TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 2}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, In1D_OutOfRangeBeginNegativeStride) { + float input_data[] = {1, 2, 3, 4}; + float expect_data[] = {4, 3, 2}; + auto *param = GetStridedSliceParameter({5}, {0}, {-1}); + TestMain({{{4}, input_data, Tensor::Category::VAR}}, {{3}, expect_data}, param, false); +} + +TEST_F(TestStridedSliceOpenCL, tflite_cpu) { + std::vector values(32768); + for (int i = 0; i < values.size(); ++i) { + values[i] = i % 1000; + } + std::vector, std::vector, std::vector, std::vector, + std::vector, std::vector, std::vector>> + cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {3}, {1}}, + {"In1D_Int32End", {32768}, {32768}, values, values, {0}, {32768}, {1}}, + {"In1D_NegativeBegin", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {-3}, {3}, {1}}, + {"In1D_OutOfRangeBegin", {4}, {3}, {1, 2, 3, 4}, {1, 2, 3}, {-5}, {3}, {1}}, + {"In1D_NegativeEnd", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {-2}, {1}}, + {"In1D_OutOfRangeEnd", {4}, {3}, {1, 2, 3, 4}, {2, 3, 4}, {-3}, {5}, {1}}, + {"In1D_NegativeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {3}, {-2}, {-3}, {-1}}, + {"In1D_OutOfRangeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {4}, {5}, {2}, {-1}}, + {"In1D_NegativeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {3, 2}, {2}, {-4}, {-1}}, + {"In1D_OutOfRangeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {2, 1}, {-3}, {-5}, {-1}}, + {"In1D_NegStride", {3}, {3}, {1, 2, 3}, {3, 2, 1}, {-1}, {-4}, {-1}}, + {"In1D_EvenLenStride2", {2}, {1}, {1, 2}, {1}, {0}, {2}, {2}}, + {"In1D_OddLenStride2", {3}, {2}, {1, 2, 3}, {1, 3}, {0}, {3}, {2}}, + {"In2D_Identity", {2, 3}, {2, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 1}}, + {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {2, 2}, {1, 1}}, + {"In2D_Stride2", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {1, 3}, {0, 0}, {2, 3}, {2, 2}}, + {"In2D_NegStride", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {2, -1}}, + {"In2D_BeginMask", {2, 3}, {2, 2}, {1, 2, 3, 4, 5, 6}, {1, 2, 4, 5}, {0, 0}, {2, 2}, {1, 1}}, + {"In2D_EndMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {4, 5, 6}, {1, 0}, {2, 3}, {1, 1}}, + {"In2D_NegStrideBeginMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {1, -1}}, + {"In2D_NegStrideEndMask", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {6, 5}, {1, -1}, {2, 0}, {1, -1}}, + {"In3D_Identity", + {2, 3, 2}, + {2, 3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {0, 0, 0}, + {2, 3, 2}, + {1, 1, 1}}, + {"In3D_NegStride", + {2, 3, 2}, + {2, 3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, + {-1, -1, -1}, + {-3, -4, -3}, + {-1, -1, -1}}, + {"In3D_Strided2", + {2, 3, 2}, + {1, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 5}, + {0, 0, 0}, + {2, 3, 2}, + {2, 2, 2}}, + {"In1D_ShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {2}, {1}}, + {"In1D_ShrinkAxisMask1_NegativeSlice", {4}, {1}, {0, 1, 2, 3}, {3}, {-1}, {4}, {1}}, + {"In2D_ShrinkAxis3_NegativeSlice", {4, 1}, {1}, {0, 1, 2, 3}, {2}, {-2, -1}, {3, 1}, {1, 1}}, + {"In2D_ShrinkAxis2_BeginEndAxis1_NegativeSlice", + {4, 1}, + {4}, + {0, 1, 2, 3}, + {0, 1, 2, 3}, + {0, -1}, + {4, 1}, + {1, 1}}, + {"In1D_BeginMaskShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {1}, {0}, {1}, {1}}, + {"In2D_ShrinkAxisMask1", {2, 3}, {3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3}, {0, 0}, {1, 3}, {1, 1}}, + {"In2D_ShrinkAxisMask2", {2, 3}, {2}, {1, 2, 3, 4, 5, 6}, {1, 4}, {0, 0}, {2, 1}, {1, 1}}, + {"In2D_ShrinkAxisMask3", {2, 3}, {1}, {1, 2, 3, 4, 5, 6}, {1}, {0, 0}, {1, 1}, {1, 1}}, + {"In3D_IdentityShrinkAxis1", + {2, 3, 2}, + {3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6}, + {0, 0, 0}, + {1, 3, 2}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis2", + {2, 3, 2}, + {2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 7, 8}, + {0, 0, 0}, + {2, 1, 2}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis3", + {2, 3, 2}, + {2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2}, + {0, 0, 0}, + {1, 1, 2}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis4", + {2, 3, 2}, + {2, 3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 3, 5, 7, 9, 11}, + {0, 0, 0}, + {2, 3, 1}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis5", + {2, 3, 2}, + {3}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 3, 5}, + {0, 0, 0}, + {1, 3, 1}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis6", + {2, 3, 2}, + {2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 7}, + {0, 0, 0}, + {2, 1, 1}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis7", + {2, 3, 2}, + {1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1}, + {0, 0, 0}, + {1, 1, 1}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis1Uint8", + {2, 3, 2}, + {3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6}, + {0, 0, 0}, + {1, 3, 2}, + {1, 1, 1}}, + {"In3D_IdentityShrinkAxis1int8", + {2, 3, 2}, + {3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6}, + {0, 0, 0}, + {1, 3, 2}, + {1, 1, 1}}, + {"In5D_Identity", + {2, 2, 2, 2}, + {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {1, 2, 3, 4, 9, 10, 11, 12}, + {0, 0, 0, 0}, + {2, 1, 2, 2}, + {1, 1, 1, 1}}, + {"In5D_IdentityShrinkAxis1", + {2, 2, 2, 2}, + {1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {1, 2, 3, 4}, + {0, 0, 0, 0}, + {1, 1, 2, 2}, + {1, 1, 1, 1}}, + {"In3D_SmallBegin", + {2, 3, 2}, + {1, 3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6}, + {0}, + {1}, + {1}}, + {"In3D_SmallBeginWithhrinkAxis1", + {2, 3, 2}, + {3, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {1, 2, 3, 4, 5, 6}, + {0}, + {1}, + {1}}}; + + for (auto &case_ : cases) { + auto &name = std::get<0>(case_); + auto &input_shape = std::get<1>(case_); + auto &output_shape = std::get<2>(case_); + auto &input_data = std::get<3>(case_); + auto &expect_data = std::get<4>(case_); + auto &begin = std::get<5>(case_); + auto &end = std::get<6>(case_); + auto &stride = std::get<7>(case_); + + std::cout << name << std::endl; + auto *param = GetStridedSliceParameter(begin, end, stride); + TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, + false); + param = GetStridedSliceParameter(begin, end, stride); + TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param, + true); + } +} + +TEST_F(TestStridedSliceOpenCL, tflite_opencl) { + float input_data[] = {0.1f, 0.2f, 0.3f, 0.4, 1.1f, 1.2f, 1.3f, 1.4, 10.1f, 10.2f, 10.3f, 10.4, + 11.1f, 11.2f, 11.3f, 11.4, 20.1f, 20.2f, 20.3f, 20.4, 21.1f, 21.2f, 21.3f, 21.4}; + float expect_data[] = {10.2, 10.4, 20.2, 20.4}; + auto *param = GetStridedSliceParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2}); + TestMain({{{1, 3, 2, 4}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false); +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc index 1bdc24028e..1aa68f4363 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc @@ -15,11 +15,17 @@ */ #include -#include "src/common/log_adapter.h" -#include "mindspore/lite/src/common/file_utils.h" +#include "common/common_test.h" +#include "src/kernel_registry.h" #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" #include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h" +using mindspore::kernel::LiteKernel; +using mindspore::kernel::SubGraphOpenCLKernel; +using mindspore::lite::KernelRegistry; +using mindspore::lite::Tensor; +using mindspore::schema::Format::Format_NHWC; + namespace mindspore { void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) { @@ -35,4 +41,80 @@ void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) { } } +void TestMain(const std::vector, float *, Tensor::Category>> &input_infos, + std::tuple, float *> output_info, OpParameter *op_parameter, bool fp16_enable, + float atol, bool print_output) { + MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator"; + auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); + auto ocl_runtime = runtime_wrapper.GetInstance(); + EXPECT_TRUE(ocl_runtime->Init() == RET_OK); + ocl_runtime->SetFp16Enable(fp16_enable); + auto allocator = ocl_runtime->GetAllocator(); + + MS_LOG(DEBUG) << "create Tensors & init weight data"; + std::vector tensors; + std::vector kernel_inputs; + std::vector subgraph_inputs; + std::map subgraph_inputs_data; + for (auto input_info : input_infos) { + const std::vector &shape = std::get<0>(input_info); + auto *input_data = std::get<1>(input_info); + const Tensor::Category category = std::get<2>(input_info); + tensors.emplace_back(kNumberTypeFloat32, shape, Format_NHWC, category); + auto *new_tensor = &tensors.back(); + kernel_inputs.push_back(new_tensor); + if (category != Tensor::Category::VAR) { + memcpy(new_tensor->MutableData(), input_data, new_tensor->Size()); + } else { + subgraph_inputs.push_back(new_tensor); + subgraph_inputs_data[new_tensor] = input_data; + } + } + const std::vector &output_shape = std::get<0>(output_info); + float *expect_data = std::get<1>(output_info); + auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, Tensor::Category::VAR); + + MS_LOG(DEBUG) << "create OpenCL Kernel"; + auto primitive_type = static_cast(op_parameter->type_); + kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type}; + auto creator = KernelRegistry::GetInstance()->GetCreator(key); + if (creator == nullptr) { + std::cerr << "get kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; + free(op_parameter); + FAIL(); + } + auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr); + if (kernel == nullptr) { + std::cerr << "call kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl; + free(op_parameter); + FAIL(); + } + + MS_LOG(DEBUG) << "create SubGraph & init input data"; + std::vector kernels{kernel}; + auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels); + if (sub_graph == nullptr) { + return; + } + for (auto input : subgraph_inputs) { + EXPECT_TRUE(input->MallocData(allocator) == RET_OK); + } + EXPECT_TRUE(sub_graph->Init() == RET_OK); + for (auto input : subgraph_inputs) { + memcpy(input->data_c(), subgraph_inputs_data[input], input->Size()); + } + + MS_LOG(DEBUG) << "run SubGraph & compare result"; + EXPECT_TRUE(sub_graph->Run() == RET_OK); + if (print_output) { + for (int i = 0; i < output.ElementsNum(); ++i) { + printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], reinterpret_cast(output.data_c())[i]); + } + } + CommonTest::CompareOutputData(reinterpret_cast(output.data_c()), expect_data, output.ElementsNum(), atol); + + MS_LOG(DEBUG) << "release resources"; + delete sub_graph; +} + } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h index dd4fb08958..dadcbd00a9 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h @@ -14,16 +14,18 @@ * limitations under the License. */ +#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ +#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ + #include #include -#include "tests/ut/cpp/common/common_test.h" -#include "src/common/log_adapter.h" +#include +#include +#include +#include "mindspore/lite/src/tensor.h" #include "mindspore/lite/src/common/file_utils.h" -#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h" -#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h" -#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ -#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_ +using mindspore::lite::Tensor; namespace mindspore { @@ -63,6 +65,10 @@ void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol); } +void TestMain(const std::vector, float *, Tensor::Category>> &input_infos, + std::tuple, float *> output_info, OpParameter *op_parameter, bool fp16_enable = false, + float atol = 10e-9, bool print_output = false); + } // namespace mindspore #endif // TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_