diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl index 72a20cd293..50492b36d9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl @@ -2,8 +2,8 @@ #define INT2 int2 #define INT4 int4 __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; -__kernel void slice(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, - INT4 begin, INT2 sharedNoUpdiv) { +__kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, + INT4 begin, INT2 sharedNoUpdiv) { int X = get_global_id(1); // H int Y = get_global_id(2); // W if (X >= out_shape.y || Y >= out_shape.z) { @@ -16,7 +16,7 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output, WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result); } } else { - int begin_postion = sharedNoUpdiv.y % 4; + int begin_postion = sharedNoUpdiv.x % 4; FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y))); if (begin_postion == 1) { for (int i = 1; i <= out_shape.w; i++) { @@ -76,3 +76,71 @@ __kernel void slice(__read_only image2d_t input, __write_only image2d_t output, WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0); } } +__kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape, + INT4 begin, INT2 sharedNoUpdiv) { + int X = get_global_id(1); // H + int Y = get_global_id(2); // W + if (X >= out_shape.y || Y >= out_shape.z) { + return; + } + FLT4 result; + if (sharedNoUpdiv.x % 4 == 0) { + for (int i = 0; i < out_shape.w; i++) { + result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); + WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result); + } + } else { + int begin_postion = sharedNoUpdiv.x % 4; + FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y))); + if (begin_postion == 1) { + for (int i = 1; i <= out_shape.w; i++) { + FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); + result.x = first.y; + result.y = first.z; + result.z = first.w; + result.w = second.x; + WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); + first.y = second.y; + first.z = second.z; + first.w = second.w; + } + } else if (begin_postion == 2) { + for (int i = 1; i <= out_shape.w; i++) { + FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); + result.x = first.z; + result.y = first.w; + result.z = second.x; + result.w = second.y; + WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); + first.z = second.z; + first.w = second.w; + } + } else { + for (int i = 1; i <= out_shape.w; i++) { + FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y))); + result.x = first.w; + result.y = second.x; + result.z = second.y; + result.w = second.z; + WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result); + first.w = second.w; + } + } + } + // judge the line of size + int size = sharedNoUpdiv.y % 4; + FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f); + if (size == 1) { + result_fill0.x = result.x; + WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); + } else if (size == 2) { + result_fill0.x = result.x; + result_fill0.y = result.y; + WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); + } else if (size == 3) { + result_fill0.x = result.x; + result_fill0.y = result.y; + result_fill0.z = result.z; + WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0); + } +} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc index 6e8c76d84d..5d29e1aa93 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc @@ -35,7 +35,7 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { im_dst_x = out_tensors_[0]->Width() * CO4; im_dst_y = out_tensors_[0]->Height(); } else { - im_dst_y = out_tensors_[0]->Height() * CO4; + im_dst_y = out_tensors_[0]->Batch() * out_tensors_[0]->Height() * CO4; im_dst_x = out_tensors_[0]->Width(); } size_t img_dtype = CL_FLOAT; @@ -50,18 +50,28 @@ int SliceOpenCLKernel::GetImageSize(size_t idx, std::vector *img_size) { return RET_OK; } int SliceOpenCLKernel::Init() { + std::string kernel_name = "slice"; + auto in_format = op_format_; + if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { + MS_LOG(ERROR) << "input format(" << in_format << ") " + << "format not support!"; + return RET_ERROR; + } + in_ori_format_ = in_tensors_[0]->GetFormat(); + in_tensors_[0]->SetFormat(op_format_); + out_ori_format_ = out_tensors_[0]->GetFormat(); + out_tensors_[0]->SetFormat(op_format_); + if (in_format == schema::Format_NC4HW4) { + kernel_name += "_NC4HW4"; + } else if (in_format == schema::Format_NHWC4) { + kernel_name += "_NHWC4"; + } std::set build_options; std::string source = slice_source; std::string program_name = "slice"; - std::string kernel_name = "slice"; auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); ocl_runtime->LoadSource(program_name, source); ocl_runtime->BuildKernel(kernel_, program_name, kernel_name, build_options); - in_ori_format_ = in_tensors_[0]->GetFormat(); - in_tensors_[0]->SetFormat(schema::Format_NHWC4); - out_ori_format_ = out_tensors_[0]->GetFormat(); - out_tensors_[0]->SetFormat(schema::Format_NHWC4); - return RET_OK; } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc index 3faefcd935..bef522530b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc @@ -49,7 +49,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { MS_LOG(INFO) << " Read tensors from .bin "; std::vector input_shape = {1, 19, 19, 96}; std::vector output_shape = {1, 10, 10, 13}; - std::vector begin = {0, 2, 3, 4}; + std::vector begin = {0, 2, 3, 3}; std::vector size = {1, 10, 10, 13}; auto data_type = kNumberTypeFloat32; auto tensor_type = schema::NodeType_ValueNode; @@ -68,7 +68,7 @@ TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) { return; } auto *output_tensor = - new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type); + new (std::nothrow) lite::tensor::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type); if (output_tensor == nullptr) { delete tensor_data; MS_LOG(INFO) << " init tensor failed ";