diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
deleted file mode 100644
index 50492b36d9..0000000000
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/slice.cl
+++ /dev/null
@@ -1,146 +0,0 @@
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#define INT2 int2
-#define INT4 int4
-__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
-__kernel void slice_NHWC4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
-                          INT4 begin, INT2 sharedNoUpdiv) {
-  int X = get_global_id(1);  // H
-  int Y = get_global_id(2);  // W
-  if (X >= out_shape.y || Y >= out_shape.z) {
-    return;
-  }
-  FLT4 result;
-  if (sharedNoUpdiv.x % 4 == 0) {
-    for (int i = 0; i < out_shape.w; i++) {
-      result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (i + begin.w), (X + begin.y)));
-      WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i, (X)), result);
-    }
-  } else {
-    int begin_postion = sharedNoUpdiv.x % 4;
-    FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + begin.w, (X + begin.y)));
-    if (begin_postion == 1) {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
-        result.x = first.y;
-        result.y = first.z;
-        result.z = first.w;
-        result.w = second.x;
-        WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
-        first.y = second.y;
-        first.z = second.z;
-        first.w = second.w;
-      }
-    } else if (begin_postion == 2) {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
-        result.x = first.z;
-        result.y = first.w;
-        result.z = second.x;
-        result.w = second.y;
-        WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
-        first.z = second.z;
-        first.w = second.w;
-      }
-    } else {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z) * input_shape.w + (begin.w + i), (X + begin.y)));
-        result.x = first.w;
-        result.y = second.x;
-        result.z = second.y;
-        result.w = second.z;
-        WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + i - 1, (X)), result);
-        first.w = second.w;
-      }
-    }
-  }
-  // judge the line of size
-  int size = sharedNoUpdiv.y % 4;
-  FLT4 result_fill0;
-  if (size == 1) {
-    result_fill0.x = result.x;
-    result_fill0.y = 0;
-    result_fill0.z = 0;
-    result_fill0.w = 0;
-    WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
-  } else if (size == 2) {
-    result_fill0.x = result.x;
-    result_fill0.y = result.y;
-    result_fill0.z = 0;
-    result_fill0.w = 0;
-    WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
-  } else if (size == 3) {
-    result_fill0.x = result.x;
-    result_fill0.y = result.y;
-    result_fill0.z = result.z;
-    result_fill0.w = 0;
-    WRITE_IMAGE(output, (INT2)((Y)*out_shape.w + out_shape.w - 1, (X)), result_fill0);
-  }
-}
-__kernel void slice_NC4HW4(__read_only image2d_t input, __write_only image2d_t output, INT4 input_shape, INT4 out_shape,
-                           INT4 begin, INT2 sharedNoUpdiv) {
-  int X = get_global_id(1);  // H
-  int Y = get_global_id(2);  // W
-  if (X >= out_shape.y || Y >= out_shape.z) {
-    return;
-  }
-  FLT4 result;
-  if (sharedNoUpdiv.x % 4 == 0) {
-    for (int i = 0; i < out_shape.w; i++) {
-      result = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
-      WRITE_IMAGE(output, (INT2)((Y), (i * out_shape.y + X)), result);
-    }
-  } else {
-    int begin_postion = sharedNoUpdiv.x % 4;
-    FLT4 first = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (begin.w) * input_shape.y + (X + begin.y)));
-    if (begin_postion == 1) {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
-        result.x = first.y;
-        result.y = first.z;
-        result.z = first.w;
-        result.w = second.x;
-        WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
-        first.y = second.y;
-        first.z = second.z;
-        first.w = second.w;
-      }
-    } else if (begin_postion == 2) {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
-        result.x = first.z;
-        result.y = first.w;
-        result.z = second.x;
-        result.w = second.y;
-        WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
-        first.z = second.z;
-        first.w = second.w;
-      }
-    } else {
-      for (int i = 1; i <= out_shape.w; i++) {
-        FLT4 second = READ_IMAGE(input, smp_none, (INT2)((Y + begin.z), (i + begin.w) * input_shape.y + (X + begin.y)));
-        result.x = first.w;
-        result.y = second.x;
-        result.z = second.y;
-        result.w = second.z;
-        WRITE_IMAGE(output, (INT2)((Y), ((i - 1) * out_shape.y + X)), result);
-        first.w = second.w;
-      }
-    }
-  }
-  // judge the line of size
-  int size = sharedNoUpdiv.y % 4;
-  FLT4 result_fill0 = (FLT4)(0.0f, 0.0f, 0.0f, 0.0f);
-  if (size == 1) {
-    result_fill0.x = result.x;
-    WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
-  } else if (size == 2) {
-    result_fill0.x = result.x;
-    result_fill0.y = result.y;
-    WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
-  } else if (size == 3) {
-    result_fill0.x = result.x;
-    result_fill0.y = result.y;
-    result_fill0.z = result.z;
-    WRITE_IMAGE(output, (INT2)((Y), ((out_shape.w - 1) * out_shape.y + X)), result_fill0);
-  }
-}
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl
new file mode 100644
index 0000000000..b1f0c24aae
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/strided_slice.cl
@@ -0,0 +1,59 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+__constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+
+__kernel void strided_slice(__read_only image2d_t input, __write_only image2d_t output, int4 input_shape,
+                            int4 output_shape, int2 io_slices, int4 begin, int4 stride, int4 size) {
+  int IN = input_shape.x, IH = input_shape.y, IW = input_shape.z, CI = input_shape.w;
+  int ON = output_shape.x, OH = output_shape.y, OW = output_shape.z, CO = output_shape.w;
+  int CI_SLICES = io_slices.x, CO_SLICES = io_slices.y;
+  int on_oh = get_global_id(0);
+  int ow = get_global_id(1);
+  int co_slice = get_global_id(2);
+  int on = on_oh / OH;
+  int oh = on_oh % OH;
+  if (on >= ON || oh >= OH || ow >= OW || co_slice >= CO_SLICES) {
+    return;
+  }
+
+  FLT tmp[4];
+  for (int i = 0; i < 4; ++i) {
+    // output_shape idx -> size idx. because squeeze(output_shape)=squeeze(size)
+    // for example:
+    // python code: B = A[1, 1:16, 2:16, 3:16]
+    // input_shape  = [16, 16, 16, 16]
+    // begin        = [ 1,  1,  2,  3]
+    // end          = [ 2, 16, 16, 16]
+    // stride       = [ 1,  1,  1,  1]
+    // size         = [ 1, 15, 14, 13] = ceil((end - begin) / stride)
+    // output_shape = [    15, 14, 13]
+    int idx = ((on * OH + oh) * OW + ow) * CO + co_slice * 4 + i;
+    int co_ = idx % size.w;
+    idx /= size.w;
+    int ow_ = idx % size.z;
+    idx /= size.z;
+    int oh_ = idx % size.y;
+    idx /= size.y;
+    int on_ = idx;
+
+    int in = begin.x + stride.x * on_;
+    int ih = begin.y + stride.y * oh_;
+    int iw = begin.z + stride.z * ow_;
+    int ci = begin.w + stride.w * co_;
+
+    FLT4 src = READ_IMAGE(input, smp_none, (int2)(iw * CI_SLICES + ci / 4, in * IH + ih));
+    int offset = ci % 4;
+    if (offset == 0) {
+      tmp[i] = src.x;
+    } else if (offset == 1) {
+      tmp[i] = src.y;
+    } else if (offset == 2) {
+      tmp[i] = src.z;
+    } else {
+      tmp[i] = src.w;
+    }
+  }
+
+  FLT4 out = (FLT4)(tmp[0], tmp[1], tmp[2], tmp[3]);
+  WRITE_IMAGE(output, (int2)(ow * CO_SLICES + co_slice, on_oh), out);
+}
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
deleted file mode 100644
index c8d6b009dc..0000000000
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <cstring>
-#include <string>
-#include <algorithm>
-#include <set>
-#include "src/kernel_registry.h"
-#include "src/runtime/kernel/opencl/kernel/slice.h"
-#include "src/runtime/kernel/opencl/utils.h"
-#include "src/runtime/kernel/opencl/cl/slice.cl.inc"
-
-using mindspore::kernel::KERNEL_ARCH::kGPU;
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Slice;
-
-namespace mindspore::kernel {
-
-int SliceOpenCLKernel::Init() {
-  std::set<std::string> build_options;
-  std::string source = slice_source;
-  std::string program_name = "slice";
-  std::string kernel_name = "slice_NHWC4";
-  ocl_runtime_->LoadSource(program_name, source);
-  ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options);
-  MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  return RET_OK;
-}
-
-void SlcieGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *local, int max_size) {
-  const int max_divider = 8;
-  const int max_x = 4, max_y = 8;
-  int x = std::min(GetMaxDivisorStrategy1(global[0], max_divider), max_x);
-  int yz = max_size / x;
-  int y = std::min(std::min(GetMaxDivisorStrategy1(global[1], max_divider), yz), max_y);
-  int z = std::min(yz / y, static_cast<int>(UP_DIV(global[2], 2)));
-
-  local->clear();
-  local->push_back(x);
-  local->push_back(y);
-  local->push_back(z);
-}
-
-int SliceOpenCLKernel::Run() {
-  MS_LOG(DEBUG) << this->name() << " Running! ";
-  auto param = reinterpret_cast<SliceParameter *>(this->op_parameter_);
-  auto input_shape = in_tensors_[0]->shape();
-  cl_int4 input_shape_ = {input_shape[0], input_shape[1], input_shape[2], UP_DIV(input_shape[3], C4NUM)};
-  cl_int4 size_ = {param->size_[0], param->size_[1], param->size_[2], UP_DIV(param->size_[3], C4NUM)};
-  cl_int4 begin_ = {param->begin_[0], param->begin_[1], param->begin_[2], param->begin_[3] / 4};
-  cl_int2 sharedNoUpdiv = {param->begin_[3], param->size_[3]};
-  uint32_t OH = param->size_[1];
-  uint32_t OW = param->size_[2];
-
-  const std::vector<size_t> &max_global = ocl_runtime_->GetWorkItemSize();
-  std::vector<size_t> local = {1, 1, 1};  // init local
-  std::vector<size_t> global = {1, OH, OW};
-  SlcieGetWorkGroup(global, &local, max_global[0]);
-  int arg_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());   // input tensor
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());  // out tensor
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, size_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, sharedNoUpdiv);
-  ocl_runtime_->RunKernel(kernel_, global, local, nullptr);
-
-  return RET_OK;
-}
-
-kernel::LiteKernel *OpenCLSliceKernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                             const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                             const lite::InnerContext *ctx, const kernel::KernelKey &desc,
-                                             const mindspore::lite::PrimitiveC *primitive) {
-  auto *kernel = new (std::nothrow) SliceOpenCLKernel(opParameter, inputs, outputs);
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << " new SliceOpenCLKernel failed ";
-    free(opParameter);
-    return nullptr;
-  }
-  auto ret = kernel->Init();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << " Init kernel failed, name: Slice ";
-    delete kernel;
-    return nullptr;
-  }
-  return kernel;
-}
-
-REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLSliceKernelCreator);
-REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLSliceKernelCreator);
-}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
new file mode 100644
index 0000000000..2c0f1e3efb
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
@@ -0,0 +1,192 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <cstring>
+#include <deque>
+#include <string>
+#include <algorithm>
+#include <set>
+#include "src/kernel_registry.h"
+#include "src/runtime/kernel/opencl/kernel/strided_slice.h"
+#include "src/runtime/kernel/opencl/utils.h"
+#include "src/runtime/kernel/opencl/cl/strided_slice.cl.inc"
+#include "nnacl/strided_slice.h"
+
+using mindspore::kernel::KERNEL_ARCH::kGPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Slice;
+using mindspore::schema::PrimitiveType_StridedSlice;
+
+namespace mindspore::kernel {
+
+int SliceOpenCLKernel::CheckSpecs() {
+  const std::string kernel_name = op_parameter_->type_ == PrimitiveType_Slice ? "Slice" : "StridedSlice";
+  if (in_tensors_.size() != 1) {
+    MS_LOG(ERROR) << kernel_name + " only supports 1 input Tensor.";
+    return RET_ERROR;
+  }
+  if (out_tensors_.size() != 1) {
+    MS_LOG(ERROR) << kernel_name + " only supports 1 output Tensor.";
+    return RET_ERROR;
+  }
+  auto in_ndim = in_tensors_.front()->shape().size();
+  if (in_ndim == 0 || in_ndim > 4) {
+    MS_LOG(ERROR) << kernel_name + " only supports 1D-4D input tensor";
+    return RET_ERROR;
+  }
+  auto out_ndim = out_tensors_.front()->shape().size();
+  if (out_ndim > 4) {
+    MS_LOG(ERROR) << kernel_name + " only supports 0D-4D output tensor";
+    return RET_ERROR;
+  }
+  if (InitConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "call SliceOpenCLKernel::InitConstArgs() failed";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int SliceOpenCLKernel::Prepare() {
+  std::set<std::string> build_options;
+  std::string program_name = "strided_slice";
+  ocl_runtime_->LoadSource(program_name, strided_slice_source);
+  ocl_runtime_->BuildKernel(kernel_, program_name, "strided_slice", build_options);
+  SetConstArgs();
+  SetGlobalLocal();
+  return RET_OK;
+}
+
+int SliceOpenCLKernel::InitConstArgs() {
+  auto input_info = Image2DInfo(in_tensors_.front());
+  auto output_info = Image2DInfo(out_tensors_.front());
+  input_shape_ = {static_cast<cl_int>(input_info.N), static_cast<cl_int>(input_info.H),
+                  static_cast<cl_int>(input_info.W), static_cast<cl_int>(input_info.C)};
+  output_shape_ = {static_cast<cl_int>(output_info.N), static_cast<cl_int>(output_info.H),
+                   static_cast<cl_int>(output_info.W), static_cast<cl_int>(output_info.C)};
+  io_slices_ = {static_cast<cl_int>(input_info.Slice), static_cast<cl_int>(output_info.Slice)};
+
+  if (op_parameter_->type_ == PrimitiveType_Slice) {
+    auto param = reinterpret_cast<SliceParameter *>(op_parameter_);
+    Broadcast2GpuShape(param->begin_, begin_.s, param->param_length_, 0);
+    Broadcast2GpuShape(param->size_, size_.s, param->param_length_, -1);
+    for (int i = 0; i < 4; ++i) {
+      if (begin_.s[i] < 0) {
+        begin_.s[i] += input_shape_.s[i];
+      }
+      if (begin_.s[i] < 0 || begin_.s[i] >= input_shape_.s[i]) {
+        MS_LOG(ERROR) << "Slice kernel only supports 0<=begin<input_shape but begin[i]=" << begin_.s[i]
+                      << " input_shape[i]=" << input_shape_.s[i];
+        return RET_ERROR;
+      }
+      if (size_.s[i] < -1 || size_.s[i] == 0) {
+        MS_LOG(ERROR) << "Slice kernel only supports size=-1 or size>0 but size[i]=" << size_.s[i];
+        return RET_ERROR;
+      }
+      if (size_.s[i] == -1 || begin_.s[i] + size_.s[i] > input_shape_.s[i]) {
+        size_.s[i] = input_shape_.s[i] - begin_.s[i];
+      }
+    }
+  } else {
+    auto param = reinterpret_cast<StridedSliceParameter *>(op_parameter_);
+    cl_int4 end = input_shape_;
+    Broadcast2GpuShape(param->begins_, begin_.s, param->num_axes_, 0);
+    Broadcast2GpuShape(param->strides_, stride_.s, param->num_axes_, 1);
+    Broadcast2GpuShape(param->ends_, end.s, param->num_axes_);
+
+    for (int i = 0; i < 4; ++i) {
+      // begin is negative
+      if (begin_.s[i] < 0) {
+        begin_.s[i] += input_shape_.s[i];
+      }
+      // avoid begin is out of range
+      begin_.s[i] = std::clamp(begin_.s[i], 0, input_shape_.s[i] - 1);
+      // end is negative
+      if (end.s[i] < 0) {
+        end.s[i] += input_shape_.s[i];
+      }
+      // avoid end is out of range
+      end.s[i] = std::clamp(end.s[i], -1, input_shape_.s[i]);
+
+      // check stride begin end
+      if (stride_.s[i] > 0) {
+        if (begin_.s[i] >= end.s[i]) {
+          MS_LOG(ERROR) << "StridedSlice kernel only supports begin_<end when stride>0";
+          return RET_ERROR;
+        }
+      } else if (stride_.s[i] < 0) {
+        if (begin_.s[i] <= end.s[i]) {
+          MS_LOG(ERROR) << "StridedSlice kernel only supports begin_>end when stride<0";
+          return RET_ERROR;
+        }
+      } else {
+        MS_LOG(ERROR) << "StridedSlice kernel only supports stride!=0";
+        return RET_ERROR;
+      }
+      size_.s[i] = std::ceil(static_cast<float>(end.s[i] - begin_.s[i]) / static_cast<float>(stride_.s[i]));
+    }
+  }
+
+  // check size
+  std::vector<int> shape_not_1;
+  std::vector<int> size_not_1;
+  std::copy_if(out_tensors_.front()->shape().begin(), out_tensors_.front()->shape().end(), shape_not_1.begin(),
+               [](int x) { return x > 1; });
+  std::copy_if(size_.s, size_.s + 4, size_not_1.begin(), [](int x) { return x > 1; });
+  if (shape_not_1 != size_not_1) {
+    MS_LOG(ERROR) << "Slice/StridedSlice kernel output shape infer error";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+void SliceOpenCLKernel::SetConstArgs() {
+  int arg_cn = 2;
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_);
+  ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_);
+}
+
+void SliceOpenCLKernel::SetGlobalLocal() {
+  auto output_info = Image2DInfo(out_tensors_.front());
+  std::vector<size_t> global = {output_info.N * output_info.H, output_info.W, output_info.Slice};
+
+  const int max_divider = 8;
+  auto max_work_group_size = ocl_runtime_->DeviceMaxWorkGroupSize();
+  size_t local_c = GetMaxDivisorStrategy0(global[2], max_divider);
+  size_t local_hw = max_work_group_size / local_c;
+  size_t local_h = std::min(UP_DIV(global[0], 2), local_hw);
+  size_t local_w = std::min(local_hw / local_h, global[1]);
+  std::vector<size_t> local = {local_h, local_w, local_c};
+  AlignGlobalLocal(global, local);
+}
+
+int SliceOpenCLKernel::Run() {
+  MS_LOG(DEBUG) << this->name() << " Running! ";
+  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
+  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
+  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr);
+  return RET_OK;
+}
+
+REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>);
+REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_Slice, OpenCLKernelCreator<SliceOpenCLKernel>);
+REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>);
+REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, OpenCLKernelCreator<SliceOpenCLKernel>);
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
similarity index 72%
rename from mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h
rename to mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
index 44fb3871d6..f29c7f19b2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_SLICE_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_OPENCL_KERNEL_STRIDED_SLICE_H_
 
 #include <vector>
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
@@ -31,12 +31,23 @@ class SliceOpenCLKernel : public OpenCLKernel {
 
   ~SliceOpenCLKernel() override = default;
 
-  int Init() override;
-
+  int Prepare() override;
   int Run() override;
 
+  int CheckSpecs() override;
+  void SetConstArgs() override;
+  void SetGlobalLocal() override;
+
  private:
+  int InitConstArgs();
+
   cl::Kernel kernel_;
+  cl_int4 input_shape_{};
+  cl_int4 output_shape_{};
+  cl_int2 io_slices_{};
+  cl_int4 begin_{};
+  cl_int4 stride_{{1, 1, 1, 1}};
+  cl_int4 size_{};
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
index 8ab9120485..8421cb5269 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@@ -34,33 +34,67 @@ struct OpenCLToFormatParameter {
   lite::opencl::MemType out_mem_type{lite::opencl::MemType::IMG};
 };
 
+template <typename SrcT, typename DstT>
+void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num) {
+  auto *N = dst;
+  auto *H = dst + 1;
+  auto *W = dst + 2;
+  auto *C = dst + 3;
+  if (src_num == 1) {
+    *N = src[0];
+  } else if (src_num == 2) {
+    *N = src[0];
+    *C = src[1];
+  } else if (src_num == 3) {
+    *N = src[0];
+    *W = src[1];
+    *C = src[2];
+  } else if (src_num == 4) {
+    *N = src[0];
+    *H = src[1];
+    *W = src[2];
+    *C = src[3];
+  } else if (src_num >= 5) {
+    MS_LOG(ERROR) << "GPU doesn't support ndim>=" << src_num;
+  }
+}
+
+template <typename SrcT, typename DstT>
+void Broadcast2GpuShape(const SrcT *src, DstT *dst, int src_num, DstT default_value) {
+  for (int i = 0; i < 4; ++i) {
+    dst[i] = default_value;
+  }
+  Broadcast2GpuShape(src, dst, src_num);
+}
+
 struct Image2DInfo {
   explicit Image2DInfo(const lite::Tensor *tensor) {
     if (tensor == nullptr) {
       return;
     }
-
     auto shape = tensor->shape();
-    if (shape.size() == 1) {
+    auto ndim = shape.size();
+    if (ndim == 1) {
       N = shape[0];
-    } else if (shape.size() == 2) {
+    } else if (ndim == 2) {
       N = shape[0];
       C = shape[1];
-    } else if (shape.size() == 3) {
+    } else if (ndim == 3) {
       N = shape[0];
       W = shape[1];
       C = shape[2];
-    } else if (shape.size() == 4) {
+    } else if (ndim == 4) {
       N = shape[0];
       H = shape[1];
       W = shape[2];
       C = shape[3];
-    } else if (shape.size() >= 5) {
-      MS_LOG(ERROR) << "GPU dont't support Tensor with dim=" << shape.size();
+    } else if (ndim >= 5) {
+      MS_LOG(ERROR) << "GPU doesn't support Tensor with ndim>=" << ndim;
     }
+    Slice = UP_DIV(C, C4NUM);
+
     FLT_size = tensor->data_type() == kNumberTypeFloat16 ? sizeof(cl_half) : sizeof(cl_float);
     FLT4_size = FLT_size * 4;
-    Slice = UP_DIV(C, C4NUM);
     if (W * Slice <= MAX_IMAGE2D_SIZE) {
       height = N * H;
       width = W * Slice;
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
index 454a319841..692ee9b073 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/slice_tests.cc
@@ -13,357 +13,151 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <iostream>
-#include <memory>
-#include "src/common/log_adapter.h"
 #include "common/common_test.h"
-#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
-#include "mindspore/lite/src/common/file_utils.h"
-#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
-#include "mindspore/lite/src/runtime/kernel/opencl/kernel/slice.h"
+#include "nnacl/slice_parameter.h"
+#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"
 
 namespace mindspore {
-class TestSliceOpenCLfp32 : public mindspore::CommonTest {
- public:
-  TestSliceOpenCLfp32() {}
-};
-class TestSliceOpenCLfp16 : public mindspore::CommonTest {
- public:
-  TestSliceOpenCLfp16() {}
-};
 
-template <typename T>
-void CompareOutputData1(T *output_data, T *correct_data, int size, float err_bound) {
-  for (size_t i = 0; i < size; i++) {
-    T abs = fabs(output_data[i] - correct_data[i]);
-    ASSERT_LE(abs, err_bound);
-  }
-}
-
-TEST_F(TestSliceOpenCLfp32, Slicefp32CI) {
-  MS_LOG(INFO) << " begin test ";
-  auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
-  ocl_runtime->Init();
-  auto allocator = ocl_runtime->GetAllocator();
-
-  MS_LOG(INFO) << " Read tensors from .bin ";
-  std::vector<int> input_shape = {1, 2, 2, 8};
-  std::vector<int> output_shape = {1, 2, 2, 5};
-  std::vector<int> begin = {0, 0, 0, 2};
-  std::vector<int> size = {1, 2, 2, 5};
-  auto data_type = kNumberTypeFloat32;
-  auto tensor_type = lite::Tensor::CONST_TENSOR;
-
-  float input_data[] = {-0.45816937, 0.92391545,  -0.9135602, -1.4002057, 1.1080881,  0.40712625,  -0.28128958,
-                        0.09470133,  0.19801073,  0.04927751, -1.2808367, 0.1470597,  0.03393711,  -0.33282498,
-                        -1.0433807,  -1.3678077,  -0.6423931, 0.5584889,  0.28965706, 0.5343769,   0.75480366,
-                        -1.9328151,  -0.48714373, 1.711132,   -1.8871949, -0.2987629, -0.14000037, -0.080552,
-                        0.95056856,  -0.06886655, 0.5316237,  0.05787678};
-  float correct_data[] = {-0.9135602,  -1.4002057,  1.1080881,  0.40712625, -0.28128958, -1.2808367, 0.1470597,
-                          0.03393711,  -0.33282498, -1.0433807, 0.28965706, 0.5343769,   0.75480366, -1.9328151,
-                          -0.48714373, -0.14000037, -0.080552,  0.95056856, -0.06886655, 0.5316237};
-  MS_LOG(INFO) << " construct tensors ";
-  lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
-  if (tensor_data == nullptr) {
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
-  }
-  auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type);
-  if (output_tensor == nullptr) {
-    delete tensor_data;
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
-  }
-  std::vector<lite::Tensor *> inputs = {tensor_data};
-  std::vector<lite::Tensor *> outputs = {output_tensor};
+class TestSliceOpenCL : public mindspore::CommonTest {};
 
-  MS_LOG(INFO) << "setting  SliceParameter ";
-  auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
+OpParameter *GetSliceParameter(const std::vector<int> &begin, const std::vector<int> &size) {
+  auto param = static_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
   if (param == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    MS_LOG(INFO) << "new SliceParameter failed ";
-    return;
+    MS_LOG(ERROR) << "SliceParameter create error.";
+    return nullptr;
   }
-  for (int i = 0; i < input_shape.size(); i++) {
+  param->op_parameter_.type_ = schema::PrimitiveType_Slice;
+  param->param_length_ = begin.size();
+  for (int i = 0; i < begin.size(); ++i) {
     param->begin_[i] = begin[i];
     param->size_[i] = size[i];
   }
-
-  auto *slice_kernel =
-    new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
-  if (slice_kernel == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    MS_LOG(INFO) << "new kernel::slice_kernel failed ";
-    return;
-  }
-  slice_kernel->Init();
-
-  // to do allocate memory for inputs and outputs
-  for (auto &input_tensor : inputs) {
-    input_tensor->MallocData(allocator);
-  }
-
-  MS_LOG(INFO) << " initialize sub_graph ";
-  std::vector<kernel::LiteKernel *> kernels{slice_kernel};
-  auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
-  if (sub_graph == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    delete slice_kernel;
-    MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
-    return;
-  }
-  sub_graph->Init();
-
-  MS_LOG(INFO) << " init tensors ";
-  memcpy(inputs[0]->data_c(), input_data, sizeof(input_data));
-
-  std::cout << "==================output data================" << std::endl;
-  sub_graph->Run();
-
-  auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c());
-  CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
-  for (auto tensor : inputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  for (auto tensor : outputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  delete sub_graph;
+  return reinterpret_cast<OpParameter *>(param);
 }
 
-TEST_F(TestSliceOpenCLfp32, Slicefp32input_dim4) {
-  MS_LOG(INFO) << " begin test ";
-  auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
-  ocl_runtime->Init();
-  auto allocator = ocl_runtime->GetAllocator();
-
-  MS_LOG(INFO) << " Read tensors from .bin ";
-  std::vector<int> input_shape = {1, 19, 19, 96};
-  std::vector<int> output_shape = {1, 10, 10, 13};
-  std::vector<int> begin = {0, 2, 3, 4};
-  std::vector<int> size = {1, 10, 10, 13};
-  auto data_type = kNumberTypeFloat32;
-  auto tensor_type = lite::Tensor::CONST_TENSOR;
-
-  // get the input from .bin
-  size_t input_size, output_size;
-  std::string input_path = "./test_data/in_slicefp32.bin";
-  std::string output_path = "./test_data/out_slicefp32.bin";
-  auto input_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
-  auto correct_data = reinterpret_cast<float *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));
-  MS_LOG(INFO) << " construct tensors ";
-  lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
-  if (tensor_data == nullptr) {
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
-  }
-  auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC, tensor_type);
-  if (output_tensor == nullptr) {
-    delete tensor_data;
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
-  }
-  std::vector<lite::Tensor *> inputs = {tensor_data};
-  std::vector<lite::Tensor *> outputs = {output_tensor};
-
-  MS_LOG(INFO) << "setting  SliceParameter ";
-  auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
-  if (param == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    MS_LOG(INFO) << "new SliceParameter failed ";
-    return;
-  }
-  for (int i = 0; i < input_shape.size(); i++) {
-    param->begin_[i] = begin[i];
-    param->size_[i] = size[i];
-  }
-
-  auto *slice_kernel =
-    new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
-  if (slice_kernel == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    MS_LOG(INFO) << "new kernel::slice_kernel failed ";
-    return;
-  }
-  slice_kernel->Init();
-
-  // to do allocate memory for inputs and outputs
-  for (auto &input_tensor : inputs) {
-    input_tensor->MallocData(allocator);
-  }
-
-  MS_LOG(INFO) << " initialize sub_graph ";
-  std::vector<kernel::LiteKernel *> kernels{slice_kernel};
-  auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
-  if (sub_graph == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    delete slice_kernel;
-    MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
-    return;
-  }
-  sub_graph->Init();
-
-  MS_LOG(INFO) << " init tensors ";
-  memcpy(inputs[0]->data_c(), input_data, input_size);
-
-  std::cout << "==================output data================" << std::endl;
-  sub_graph->Run();
-
-  auto *output_data_gpu = reinterpret_cast<float *>(output_tensor->data_c());
-  CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
-  for (auto tensor : inputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  for (auto tensor : outputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  delete sub_graph;
+TEST_F(TestSliceOpenCL, 4D) {
+  float input_data[] = {-0.45816937, 0.92391545,  -0.9135602, -1.4002057, 1.1080881,  0.40712625,  -0.28128958,
+                        0.09470133,  0.19801073,  0.04927751, -1.2808367, 0.1470597,  0.03393711,  -0.33282498,
+                        -1.0433807,  -1.3678077,  -0.6423931, 0.5584889,  0.28965706, 0.5343769,   0.75480366,
+                        -1.9328151,  -0.48714373, 1.711132,   -1.8871949, -0.2987629, -0.14000037, -0.080552,
+                        0.95056856,  -0.06886655, 0.5316237,  0.05787678};
+  float expect_data[] = {-0.9135602,  -1.4002057,  1.1080881,  0.40712625, -0.28128958, -1.2808367, 0.1470597,
+                         0.03393711,  -0.33282498, -1.0433807, 0.28965706, 0.5343769,   0.75480366, -1.9328151,
+                         -0.48714373, -0.14000037, -0.080552,  0.95056856, -0.06886655, 0.5316237};
+  auto param = GetSliceParameter({0, 0, 0, 2}, {1, 2, 2, 5});
+  TestMain({{{1, 2, 2, 8}, input_data, Tensor::Category::VAR}}, {{1, 2, 2, 5}, expect_data}, param, false);
 }
 
-TEST_F(TestSliceOpenCLfp16, Slicefp16input_dim4) {
-  MS_LOG(INFO) << " begin test ";
-  auto ocl_runtime = lite::opencl::OpenCLRuntimeWrapper().GetInstance();
-  ocl_runtime->SetFp16Enable(true);
-  ocl_runtime->Init();
-  auto allocator = ocl_runtime->GetAllocator();
-
-  MS_LOG(INFO) << " Read tensors from .bin ";
-  std::vector<int> input_shape = {1, 25, 25, 48};
-  std::vector<int> output_shape = {1, 24, 24, 15};
-  std::vector<int> begin = {0, 1, 1, 7};
-  std::vector<int> size = {1, 24, 24, 15};
-  auto data_type = kNumberTypeFloat16;
-  auto tensor_type = lite::Tensor::CONST_TENSOR;
-
-  // get the input from .bin
-  size_t input_size, output_size;
-  std::string input_path = "./test_data/in_slicefp16.bin";
-  std::string output_path = "./test_data/out_slicefp16.bin";
-  auto input_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(input_path.c_str(), &input_size));
-  auto correct_data = reinterpret_cast<float16_t *>(mindspore::lite::ReadFile(output_path.c_str(), &output_size));
-
-  MS_LOG(INFO) << " construct tensors ";
-  lite::Tensor *tensor_data = new (std::nothrow) lite::Tensor(data_type, input_shape, schema::Format_NHWC, tensor_type);
-  if (tensor_data == nullptr) {
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
-  }
-  auto *output_tensor = new (std::nothrow) lite::Tensor(data_type, output_shape, schema::Format_NHWC4, tensor_type);
-  if (output_tensor == nullptr) {
-    delete tensor_data;
-    MS_LOG(INFO) << " init tensor failed ";
-    return;
+TEST_F(TestSliceOpenCL, tflite_cpu) {
+  std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>,
+                         std::vector<int>, std::vector<int>>>
+    cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {2}},
+             {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {1, 2}},
+             {"In3D",
+              {2, 3, 2},
+              {2, 3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {0, 0, 0},
+              {2, 3, 2}},
+             {"InputFloat", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}},
+             {"IndexInt64", {4, 1, 1, 1}, {3, 1, 1, 1}, {1, 2, 3, 4}, {2, 3, 4}, {1, 0, 0, 0}, {3, 1, 1, 1}},
+             {"InputInteger1",
+              {3, 2, 3, 1},
+              {1, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3},
+              {1, 0, 0, 0},
+              {1, 1, 3, 1}},
+             {"InputInteger2",
+              {3, 2, 3, 1},
+              {1, 2, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 4, 4, 4},
+              {1, 0, 0, 0},
+              {1, 2, 3, 1}},
+             {"InputInteger3",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, 3, 1}},
+             {"SizeMinus1",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, -1, 1}},
+             {"BeginNonZeroSizeMinus1Axis1",
+              {3, 3, 2, 1},
+              {2, 2, 1, 1},
+              {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9},
+              {5, 6, 8, 9},
+              {1, 1, 0, 0},
+              {2, -1, 1, 1}},
+             {"BeginNonZeroSizeMinus1Axis2",
+              {3, 2, 3, 1},
+              {2, 1, 2, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 5, 5},
+              {1, 0, 1, 0},
+              {2, 1, -1, 1}},
+             {"BeginNonZeroSizeMinus1Axis3",
+              {3, 1, 2, 3},
+              {2, 1, 1, 2},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 5, 5},
+              {1, 0, 0, 1},
+              {2, 1, 1, -1}},
+             {"SliceUint8",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, -1, 1}},
+             {"SliceInt8",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, -1, 1}},
+             {"SliceInt16",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, -1, 1}},
+             {"SliceInt64",
+              {3, 2, 3, 1},
+              {2, 1, 3, 1},
+              {1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6},
+              {3, 3, 3, 5, 5, 5},
+              {1, 0, 0, 0},
+              {2, 1, -1, 1}}};
+
+  for (auto &case_ : cases) {
+    auto &name = std::get<0>(case_);
+    auto &input_shape = std::get<1>(case_);
+    auto &output_shape = std::get<2>(case_);
+    auto &input_data = std::get<3>(case_);
+    auto &expect_data = std::get<4>(case_);
+    auto &begin = std::get<5>(case_);
+    auto &size = std::get<6>(case_);
+
+    std::cout << name << std::endl;
+    auto *param = GetSliceParameter(begin, size);
+    TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
+             false);
+    param = GetSliceParameter(begin, size);
+    TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
+             true);
   }
-  std::vector<lite::Tensor *> inputs = {tensor_data};
-  std::vector<lite::Tensor *> outputs = {output_tensor};
-
-  MS_LOG(INFO) << " setting  SliceParameter ";
-  auto param = reinterpret_cast<SliceParameter *>(malloc(sizeof(SliceParameter)));
-  if (param == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    MS_LOG(INFO) << " new SliceParameter failed ";
-    return;
-  }
-  for (int i = 0; i < input_shape.size(); i++) {
-    param->begin_[i] = begin[i];
-    param->size_[i] = size[i];
-  }
-
-  auto *slice_kernel =
-    new (std::nothrow) kernel::SliceOpenCLKernel(reinterpret_cast<OpParameter *>(param), inputs, outputs);
-  if (slice_kernel == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    MS_LOG(INFO) << " new kernel::slice_kernel failed ";
-    return;
-  }
-  slice_kernel->Init();
-
-  // to do allocate memory for inputs and outputs
-  for (auto &input_tensor : inputs) {
-    input_tensor->MallocData(allocator);
-  }
-
-  MS_LOG(INFO) << " initialize sub_graph ";
-  std::vector<kernel::LiteKernel *> kernels{slice_kernel};
-  auto *sub_graph = new (std::nothrow) kernel::SubGraphOpenCLKernel(inputs, outputs, kernels, kernels, kernels);
-  if (sub_graph == nullptr) {
-    for (auto tensor : inputs) {
-      delete tensor;
-    }
-    for (auto tensor : outputs) {
-      delete tensor;
-    }
-    delete param;
-    delete slice_kernel;
-    MS_LOG(INFO) << " new kernel::SubGraphOpenCLKernel failed ";
-    return;
-  }
-  sub_graph->Init();
-
-  MS_LOG(INFO) << " init tensors ";
-  memcpy(inputs[0]->data_c(), input_data, input_size);
+}  // namespace mindspore
 
-  std::cout << "==================output data================" << std::endl;
-  sub_graph->Run();
-  auto *output_data_gpu = reinterpret_cast<float16_t *>(output_tensor->data_c());
-  CompareOutputData1(output_data_gpu, correct_data, output_tensor->ElementsNum(), 0.0001);
-  for (auto tensor : inputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  for (auto tensor : outputs) {
-    tensor->set_data(nullptr);
-    delete tensor;
-  }
-  delete sub_graph;
-}
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc
new file mode 100644
index 0000000000..4b4410841d
--- /dev/null
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/strided_slice_tests.cc
@@ -0,0 +1,317 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/common_test.h"
+#include "nnacl/strided_slice.h"
+#include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"
+
+namespace mindspore {
+
+class TestStridedSliceOpenCL : public mindspore::CommonTest {};
+
+OpParameter *GetStridedSliceParameter(const std::vector<int> &begins, const std::vector<int> &ends,
+                                      const std::vector<int> &strides) {
+  auto param = static_cast<StridedSliceParameter *>(malloc(sizeof(StridedSliceParameter)));
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "create StridedSliceParameter error.";
+    return nullptr;
+  }
+  param->op_parameter_.type_ = schema::PrimitiveType_StridedSlice;
+  param->num_axes_ = begins.size();
+  for (int i = 0; i < begins.size(); ++i) {
+    param->begins_[i] = begins[i];
+    param->ends_[i] = ends[i];
+    param->strides_[i] = strides[i];
+  }
+  return reinterpret_cast<OpParameter *>(param);
+}
+
+TEST_F(TestStridedSliceOpenCL, 1D) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  float expect_data[] = {3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33};
+  auto *param = GetStridedSliceParameter({3}, {36}, {3});
+  TestMain({{{36}, input_data, Tensor::Category::VAR}}, {{11}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, 2D) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  float expect_data[] = {11, 14};
+  auto *param = GetStridedSliceParameter({1, 2}, {3, 8}, {2, 3});
+  TestMain({{{4, 9}, input_data, Tensor::Category::VAR}}, {{1, 2}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, 3D) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  float expect_data[] = {11, 14};
+  auto *param = GetStridedSliceParameter({0, 1, 2}, {1, 3, 8}, {1, 2, 3});
+  TestMain({{{1, 4, 9}, input_data, Tensor::Category::VAR}}, {{1, 1, 2}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, 4D) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+
+  float expect_data0[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  auto *param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, false);
+
+  param = GetStridedSliceParameter({0, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 3, 3}, expect_data0}, param, true);
+
+  float expect_data1[] = {18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 3, 3}, expect_data1}, param, false);
+
+  float expect_data2[] = {27, 28, 29, 30, 31, 32, 33, 34, 35};
+  param = GetStridedSliceParameter({1, 1, 0, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 3, 3}, expect_data2}, param, false);
+
+  float expect_data3[] = {33, 34, 35};
+  param = GetStridedSliceParameter({1, 1, 2, 0}, {2, 2, 3, 3}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 3}, expect_data3}, param, false);
+
+  float expect_data4[] = {34};
+  param = GetStridedSliceParameter({1, 1, 2, 1}, {2, 2, 3, 2}, {1, 1, 1, 1});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 1, 1, 1}, expect_data4}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, 4D_stride2) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  float expect_data[] = {13, 14, 31, 32};
+  auto *param = GetStridedSliceParameter({0, 1, 1, 1}, {1, 4, 3, 3}, {2, 2, 2, 1});
+  TestMain({{{1, 4, 3, 3}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, 4D_to_3D) {
+  float input_data[] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
+                        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+  float expect_data[] = {18, 20, 21, 23, 27, 29, 30, 32};
+  auto *param = GetStridedSliceParameter({1, 0, 0, 0}, {2, 2, 2, 3}, {1, 1, 1, 2});
+  TestMain({{{2, 2, 3, 3}, input_data, Tensor::Category::VAR}}, {{2, 2, 2}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, In1D_OutOfRangeBeginNegativeStride) {
+  float input_data[] = {1, 2, 3, 4};
+  float expect_data[] = {4, 3, 2};
+  auto *param = GetStridedSliceParameter({5}, {0}, {-1});
+  TestMain({{{4}, input_data, Tensor::Category::VAR}}, {{3}, expect_data}, param, false);
+}
+
+TEST_F(TestStridedSliceOpenCL, tflite_cpu) {
+  std::vector<float> values(32768);
+  for (int i = 0; i < values.size(); ++i) {
+    values[i] = i % 1000;
+  }
+  std::vector<std::tuple<std::string, std::vector<int>, std::vector<int>, std::vector<float>, std::vector<float>,
+                         std::vector<int>, std::vector<int>, std::vector<int>>>
+    cases = {{"In1D", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {1}, {3}, {1}},
+             {"In1D_Int32End", {32768}, {32768}, values, values, {0}, {32768}, {1}},
+             {"In1D_NegativeBegin", {4}, {2}, {1, 2, 3, 4}, {2, 3}, {-3}, {3}, {1}},
+             {"In1D_OutOfRangeBegin", {4}, {3}, {1, 2, 3, 4}, {1, 2, 3}, {-5}, {3}, {1}},
+             {"In1D_NegativeEnd", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {-2}, {1}},
+             {"In1D_OutOfRangeEnd", {4}, {3}, {1, 2, 3, 4}, {2, 3, 4}, {-3}, {5}, {1}},
+             {"In1D_NegativeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {3}, {-2}, {-3}, {-1}},
+             {"In1D_OutOfRangeBeginNegativeStride", {4}, {1}, {1, 2, 3, 4}, {4}, {5}, {2}, {-1}},
+             {"In1D_NegativeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {3, 2}, {2}, {-4}, {-1}},
+             {"In1D_OutOfRangeEndNegativeStride", {4}, {2}, {1, 2, 3, 4}, {2, 1}, {-3}, {-5}, {-1}},
+             {"In1D_NegStride", {3}, {3}, {1, 2, 3}, {3, 2, 1}, {-1}, {-4}, {-1}},
+             {"In1D_EvenLenStride2", {2}, {1}, {1, 2}, {1}, {0}, {2}, {2}},
+             {"In1D_OddLenStride2", {3}, {2}, {1, 2, 3}, {1, 3}, {0}, {3}, {2}},
+             {"In2D_Identity", {2, 3}, {2, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 0}, {2, 3}, {1, 1}},
+             {"In2D", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {4, 5}, {1, 0}, {2, 2}, {1, 1}},
+             {"In2D_Stride2", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {1, 3}, {0, 0}, {2, 3}, {2, 2}},
+             {"In2D_NegStride", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {2, -1}},
+             {"In2D_BeginMask", {2, 3}, {2, 2}, {1, 2, 3, 4, 5, 6}, {1, 2, 4, 5}, {0, 0}, {2, 2}, {1, 1}},
+             {"In2D_EndMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {4, 5, 6}, {1, 0}, {2, 3}, {1, 1}},
+             {"In2D_NegStrideBeginMask", {2, 3}, {1, 3}, {1, 2, 3, 4, 5, 6}, {6, 5, 4}, {1, -1}, {2, -4}, {1, -1}},
+             {"In2D_NegStrideEndMask", {2, 3}, {1, 2}, {1, 2, 3, 4, 5, 6}, {6, 5}, {1, -1}, {2, 0}, {1, -1}},
+             {"In3D_Identity",
+              {2, 3, 2},
+              {2, 3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {0, 0, 0},
+              {2, 3, 2},
+              {1, 1, 1}},
+             {"In3D_NegStride",
+              {2, 3, 2},
+              {2, 3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1},
+              {-1, -1, -1},
+              {-3, -4, -3},
+              {-1, -1, -1}},
+             {"In3D_Strided2",
+              {2, 3, 2},
+              {1, 2, 1},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 5},
+              {0, 0, 0},
+              {2, 3, 2},
+              {2, 2, 2}},
+             {"In1D_ShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {2}, {1}, {2}, {1}},
+             {"In1D_ShrinkAxisMask1_NegativeSlice", {4}, {1}, {0, 1, 2, 3}, {3}, {-1}, {4}, {1}},
+             {"In2D_ShrinkAxis3_NegativeSlice", {4, 1}, {1}, {0, 1, 2, 3}, {2}, {-2, -1}, {3, 1}, {1, 1}},
+             {"In2D_ShrinkAxis2_BeginEndAxis1_NegativeSlice",
+              {4, 1},
+              {4},
+              {0, 1, 2, 3},
+              {0, 1, 2, 3},
+              {0, -1},
+              {4, 1},
+              {1, 1}},
+             {"In1D_BeginMaskShrinkAxisMask1", {4}, {1}, {1, 2, 3, 4}, {1}, {0}, {1}, {1}},
+             {"In2D_ShrinkAxisMask1", {2, 3}, {3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3}, {0, 0}, {1, 3}, {1, 1}},
+             {"In2D_ShrinkAxisMask2", {2, 3}, {2}, {1, 2, 3, 4, 5, 6}, {1, 4}, {0, 0}, {2, 1}, {1, 1}},
+             {"In2D_ShrinkAxisMask3", {2, 3}, {1}, {1, 2, 3, 4, 5, 6}, {1}, {0, 0}, {1, 1}, {1, 1}},
+             {"In3D_IdentityShrinkAxis1",
+              {2, 3, 2},
+              {3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6},
+              {0, 0, 0},
+              {1, 3, 2},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis2",
+              {2, 3, 2},
+              {2, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 7, 8},
+              {0, 0, 0},
+              {2, 1, 2},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis3",
+              {2, 3, 2},
+              {2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2},
+              {0, 0, 0},
+              {1, 1, 2},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis4",
+              {2, 3, 2},
+              {2, 3},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 3, 5, 7, 9, 11},
+              {0, 0, 0},
+              {2, 3, 1},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis5",
+              {2, 3, 2},
+              {3},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 3, 5},
+              {0, 0, 0},
+              {1, 3, 1},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis6",
+              {2, 3, 2},
+              {2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 7},
+              {0, 0, 0},
+              {2, 1, 1},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis7",
+              {2, 3, 2},
+              {1},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1},
+              {0, 0, 0},
+              {1, 1, 1},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis1Uint8",
+              {2, 3, 2},
+              {3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6},
+              {0, 0, 0},
+              {1, 3, 2},
+              {1, 1, 1}},
+             {"In3D_IdentityShrinkAxis1int8",
+              {2, 3, 2},
+              {3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6},
+              {0, 0, 0},
+              {1, 3, 2},
+              {1, 1, 1}},
+             {"In5D_Identity",
+              {2, 2, 2, 2},
+              {2, 1, 2, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+              {1, 2, 3, 4, 9, 10, 11, 12},
+              {0, 0, 0, 0},
+              {2, 1, 2, 2},
+              {1, 1, 1, 1}},
+             {"In5D_IdentityShrinkAxis1",
+              {2, 2, 2, 2},
+              {1, 2, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16},
+              {1, 2, 3, 4},
+              {0, 0, 0, 0},
+              {1, 1, 2, 2},
+              {1, 1, 1, 1}},
+             {"In3D_SmallBegin",
+              {2, 3, 2},
+              {1, 3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6},
+              {0},
+              {1},
+              {1}},
+             {"In3D_SmallBeginWithhrinkAxis1",
+              {2, 3, 2},
+              {3, 2},
+              {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12},
+              {1, 2, 3, 4, 5, 6},
+              {0},
+              {1},
+              {1}}};
+
+  for (auto &case_ : cases) {
+    auto &name = std::get<0>(case_);
+    auto &input_shape = std::get<1>(case_);
+    auto &output_shape = std::get<2>(case_);
+    auto &input_data = std::get<3>(case_);
+    auto &expect_data = std::get<4>(case_);
+    auto &begin = std::get<5>(case_);
+    auto &end = std::get<6>(case_);
+    auto &stride = std::get<7>(case_);
+
+    std::cout << name << std::endl;
+    auto *param = GetStridedSliceParameter(begin, end, stride);
+    TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
+             false);
+    param = GetStridedSliceParameter(begin, end, stride);
+    TestMain({{input_shape, input_data.data(), Tensor::Category::VAR}}, {output_shape, expect_data.data()}, param,
+             true);
+  }
+}
+
+TEST_F(TestStridedSliceOpenCL, tflite_opencl) {
+  float input_data[] = {0.1f,  0.2f,  0.3f,  0.4,  1.1f,  1.2f,  1.3f,  1.4,  10.1f, 10.2f, 10.3f, 10.4,
+                        11.1f, 11.2f, 11.3f, 11.4, 20.1f, 20.2f, 20.3f, 20.4, 21.1f, 21.2f, 21.3f, 21.4};
+  float expect_data[] = {10.2, 10.4, 20.2, 20.4};
+  auto *param = GetStridedSliceParameter({0, 1, 0, 1}, {1, 3, 2, 4}, {1, 1, 2, 2});
+  TestMain({{{1, 3, 2, 4}, input_data, Tensor::Category::VAR}}, {{1, 2, 1, 2}, expect_data}, param, false);
+}
+
+}  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc
index 1bdc24028e..1aa68f4363 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.cc
@@ -15,11 +15,17 @@
  */
 
 #include <string>
-#include "src/common/log_adapter.h"
-#include "mindspore/lite/src/common/file_utils.h"
+#include "common/common_test.h"
+#include "src/kernel_registry.h"
 #include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
 #include "mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h"
 
+using mindspore::kernel::LiteKernel;
+using mindspore::kernel::SubGraphOpenCLKernel;
+using mindspore::lite::KernelRegistry;
+using mindspore::lite::Tensor;
+using mindspore::schema::Format::Format_NHWC;
+
 namespace mindspore {
 
 void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) {
@@ -35,4 +41,80 @@ void LoadTestData(void *dst, size_t dst_size, const std::string &file_path) {
   }
 }
 
+void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos,
+              std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable,
+              float atol, bool print_output) {
+  MS_LOG(DEBUG) << "initialize OpenCLRuntime and OpenCLAllocator";
+  auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
+  auto ocl_runtime = runtime_wrapper.GetInstance();
+  EXPECT_TRUE(ocl_runtime->Init() == RET_OK);
+  ocl_runtime->SetFp16Enable(fp16_enable);
+  auto allocator = ocl_runtime->GetAllocator();
+
+  MS_LOG(DEBUG) << "create Tensors & init weight data";
+  std::vector<Tensor> tensors;
+  std::vector<Tensor *> kernel_inputs;
+  std::vector<Tensor *> subgraph_inputs;
+  std::map<Tensor *, float *> subgraph_inputs_data;
+  for (auto input_info : input_infos) {
+    const std::vector<int> &shape = std::get<0>(input_info);
+    auto *input_data = std::get<1>(input_info);
+    const Tensor::Category category = std::get<2>(input_info);
+    tensors.emplace_back(kNumberTypeFloat32, shape, Format_NHWC, category);
+    auto *new_tensor = &tensors.back();
+    kernel_inputs.push_back(new_tensor);
+    if (category != Tensor::Category::VAR) {
+      memcpy(new_tensor->MutableData(), input_data, new_tensor->Size());
+    } else {
+      subgraph_inputs.push_back(new_tensor);
+      subgraph_inputs_data[new_tensor] = input_data;
+    }
+  }
+  const std::vector<int> &output_shape = std::get<0>(output_info);
+  float *expect_data = std::get<1>(output_info);
+  auto output = Tensor(kNumberTypeFloat32, output_shape, Format_NHWC, Tensor::Category::VAR);
+
+  MS_LOG(DEBUG) << "create OpenCL Kernel";
+  auto primitive_type = static_cast<schema::PrimitiveType>(op_parameter->type_);
+  kernel::KernelKey key{kernel::kGPU, kernel_inputs.front()->data_type(), primitive_type};
+  auto creator = KernelRegistry::GetInstance()->GetCreator(key);
+  if (creator == nullptr) {
+    std::cerr << "get kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl;
+    free(op_parameter);
+    FAIL();
+  }
+  auto *kernel = creator(kernel_inputs, {&output}, op_parameter, nullptr, key, nullptr);
+  if (kernel == nullptr) {
+    std::cerr << "call kernel registry function error: " << schema::EnumNamePrimitiveType(primitive_type) << std::endl;
+    free(op_parameter);
+    FAIL();
+  }
+
+  MS_LOG(DEBUG) << "create SubGraph & init input data";
+  std::vector<LiteKernel *> kernels{kernel};
+  auto sub_graph = new (std::nothrow) SubGraphOpenCLKernel(subgraph_inputs, {&output}, kernels, kernels, kernels);
+  if (sub_graph == nullptr) {
+    return;
+  }
+  for (auto input : subgraph_inputs) {
+    EXPECT_TRUE(input->MallocData(allocator) == RET_OK);
+  }
+  EXPECT_TRUE(sub_graph->Init() == RET_OK);
+  for (auto input : subgraph_inputs) {
+    memcpy(input->data_c(), subgraph_inputs_data[input], input->Size());
+  }
+
+  MS_LOG(DEBUG) << "run SubGraph & compare result";
+  EXPECT_TRUE(sub_graph->Run() == RET_OK);
+  if (print_output) {
+    for (int i = 0; i < output.ElementsNum(); ++i) {
+      printf("%d: expect=%.3f output=%.3f\n", i, expect_data[i], reinterpret_cast<float *>(output.data_c())[i]);
+    }
+  }
+  CommonTest::CompareOutputData(reinterpret_cast<float *>(output.data_c()), expect_data, output.ElementsNum(), atol);
+
+  MS_LOG(DEBUG) << "release resources";
+  delete sub_graph;
+}
+
 }  // namespace mindspore
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h
index dd4fb08958..dadcbd00a9 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h
+++ b/mindspore/lite/test/ut/src/runtime/kernel/opencl/utils_tests.h
@@ -14,16 +14,18 @@
  * limitations under the License.
  */
 
+#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
+#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
+
 #include <string>
 #include <iostream>
-#include "tests/ut/cpp/common/common_test.h"
-#include "src/common/log_adapter.h"
+#include <vector>
+#include <tuple>
+#include <map>
+#include "mindspore/lite/src/tensor.h"
 #include "mindspore/lite/src/common/file_utils.h"
-#include "mindspore/lite/src/runtime/opencl/opencl_runtime.h"
-#include "mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
 
-#ifndef TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
-#define TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_
+using mindspore::lite::Tensor;
 
 namespace mindspore {
 
@@ -63,6 +65,10 @@ void CompareOutput(lite::Tensor *output_tensor, const std::string &file_path, T
   CompareOutput(output_tensor->data_c(), expect_data, output_tensor->ElementsNum(), atol, rtol);
 }
 
+void TestMain(const std::vector<std::tuple<std::vector<int>, float *, Tensor::Category>> &input_infos,
+              std::tuple<std::vector<int>, float *> output_info, OpParameter *op_parameter, bool fp16_enable = false,
+              float atol = 10e-9, bool print_output = false);
+
 }  // namespace mindspore
 
 #endif  // TESTS_UT_OPENCL_KERNEL_TESTS_UTILS_H_