diff --git a/mindspore/lite/nnacl/base/reshape_base.h b/mindspore/lite/nnacl/base/reshape_base.h
deleted file mode 100644
index d2b12302c8..0000000000
--- a/mindspore/lite/nnacl/base/reshape_base.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
-#define MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
-
-#include <string.h>
-#include "nnacl/op_base.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-inline void Reshape(const void *input_ptr, void *output_ptr, size_t data_size) {
-  memcpy(output_ptr, input_ptr, data_size);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_RESHAHPE_BASE_H_
diff --git a/mindspore/lite/nnacl/base/squeeze_base.h b/mindspore/lite/nnacl/base/squeeze_base.h
deleted file mode 100644
index 5f3ea2da4e..0000000000
--- a/mindspore/lite/nnacl/base/squeeze_base.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
-#define MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
-
-#include "nnacl/errorcode.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-static inline int DoSqueeze(const void *input_ptr, void *output_ptr, size_t data_size) {
-  if (input_ptr == NULL || output_ptr == NULL) {
-    return NNACL_ERR;
-  }
-  (void)memcpy(output_ptr, input_ptr, data_size);
-  return NNACL_OK;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_SQUEEZE_BASE_H_
diff --git a/mindspore/lite/nnacl/op_base.h b/mindspore/lite/nnacl/op_base.h
index f9cb3d6c44..82105f8e97 100644
--- a/mindspore/lite/nnacl/op_base.h
+++ b/mindspore/lite/nnacl/op_base.h
@@ -64,6 +64,7 @@
 
 typedef enum LiteDataType {
   kDataTypeFloat,
+  kDataTypeFloat16,
   kDataTypeInt,
   kDataTypeInt8,
   KDataTypeBool,
diff --git a/mindspore/lite/nnacl/strided_slice.c b/mindspore/lite/nnacl/strided_slice.c
index f227a082f8..a7a14022bc 100644
--- a/mindspore/lite/nnacl/strided_slice.c
+++ b/mindspore/lite/nnacl/strided_slice.c
@@ -108,6 +108,10 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
                 *((int8_t *)out_data + out_offset) = *((int8_t *)in_data + in_offset);
               } else if (param->data_type == kDataTypeInt) {
                 *((int32_t *)out_data + out_offset) = *((int32_t *)in_data + in_offset);
+#ifdef ENABLE_ARM64
+              } else if (param->data_type == kDataTypeFloat16) {
+                *((float16_t *)out_data + out_offset) = *((float16_t *)in_data + in_offset);
+#endif
               } else {
                 return NNACL_ERR;
               }
@@ -120,3 +124,15 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
   }
   return NNACL_OK;
 }
+
+void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size,
+                size_t in_offset) {
+  for (size_t i = 0; i < outer; ++i) {
+    const uint8_t *input_ptr = input + i * in_offset;
+    for (int j = 0; j < split_len; ++j) {
+      memcpy(output, input_ptr, inner_size);
+      output += inner_size;
+      input_ptr += inner_size * stride;
+    }
+  }
+}
diff --git a/mindspore/lite/nnacl/strided_slice.h b/mindspore/lite/nnacl/strided_slice.h
index 9d3d353990..2e0ff78a4f 100644
--- a/mindspore/lite/nnacl/strided_slice.h
+++ b/mindspore/lite/nnacl/strided_slice.h
@@ -39,6 +39,9 @@ typedef struct StridedSliceParameter {
 extern "C" {
 #endif
 int DoStridedSlice(const void *inputs, void *output, StridedSliceParameter *param);
+
+void FastStride(const uint8_t *input, uint8_t *output, int split_len, int stride, size_t outer, size_t inner_size,
+                size_t in_offset);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/src/ops/populate/expand_dims_populate.cc b/mindspore/lite/src/ops/populate/expand_dims_populate.cc
index 63901a9993..bb62cc477c 100644
--- a/mindspore/lite/src/ops/populate/expand_dims_populate.cc
+++ b/mindspore/lite/src/ops/populate/expand_dims_populate.cc
@@ -26,6 +26,7 @@ OpParameter *PopulateExpandDimsParameter(const mindspore::lite::PrimitiveC *prim
     MS_LOG(ERROR) << "malloc ExpandDimsParameter failed.";
     return nullptr;
   }
+  expand_dims_param->type_ = primitive->Type();
   memset(expand_dims_param, 0, sizeof(OpParameter));
   return reinterpret_cast<OpParameter *>(expand_dims_param);
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
new file mode 100644
index 0000000000..9da7d0e779
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/base/reshape_base.h"
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Reshape;
+
+namespace mindspore::kernel {
+int ReshapeBaseCPUKernel::Init() { return ReSize(); }
+
+int ReshapeBaseCPUKernel::ReSize() {
+  int in_data_size = in_tensors_.front()->Size();
+  int thread_num = context_->thread_num_;
+  cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num);
+  return RET_OK;
+}
+
+int ReshapeBaseCPUKernel::RunImpl(int task_id) {
+  size_t start_index = task_id * cal_max_num_per_thread_;
+  auto cur_in_ptr = input_ptr_ + start_index;
+  auto cur_out_ptr = output_ptr_ + start_index;
+  size_t data_size = in_tensors_.front()->Size() - start_index;
+  data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size;
+  memcpy(cur_out_ptr, cur_in_ptr, data_size);
+  return RET_OK;
+}
+
+int ReshapeRun(void *cdata, int task_id) {
+  auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
+  auto ret = reshape->RunImpl(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int ReshapeBaseCPUKernel::Run() {
+  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
+  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
+  auto ret = ParallelLaunch(this->context_->thread_pool_, ReshapeRun, this, context_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator<ReshapeBaseCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
similarity index 62%
rename from mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
rename to mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
index 58a93984ba..064e11dec1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
@@ -13,32 +13,33 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
 
 #include <vector>
-#include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/base/reshape_base.h"
 #include "src/lite_kernel.h"
 #include "include/context.h"
-#include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
 
 using mindspore::lite::InnerContext;
-
 namespace mindspore::kernel {
-class ReshapeFp16CPUKernel : public ReshapeCPUKernel {
+class ReshapeBaseCPUKernel : public LiteKernel {
  public:
-  ReshapeFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+  ReshapeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
                        const mindspore::lite::PrimitiveC *primitive)
-      : ReshapeCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~ReshapeFp16CPUKernel() = default;
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~ReshapeBaseCPUKernel() override = default;
 
+  int Init() override;
+  int ReSize() override;
   int Run() override;
+  int RunImpl(int task_id);
 
  private:
+  size_t cal_max_num_per_thread_ = 0;
+  uint8_t *input_ptr_ = nullptr;
+  uint8_t *output_ptr_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_RESHAPE_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_RESHAPE_BASE_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
similarity index 61%
rename from mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc
rename to mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
index c5c3da59ba..2be895eddd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.cc
@@ -13,34 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include "src/runtime/kernel/arm/fp32/squeeze_fp32.h"
-#include "schema/model_generated.h"
+#include "src/runtime/kernel/arm/base/squeeze_base.h"
 #include "src/kernel_registry.h"
-#include "include/errorcode.h"
+#include "schema/model_generated.h"
 
 using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Squeeze;
-
 namespace mindspore::kernel {
-int SqueezeCPUKernel::Init() { return RET_OK; }
-
-int SqueezeCPUKernel::ReSize() { return RET_OK; }
-
-int SqueezeCPUKernel::Run() {
-  size_t data_size = in_tensors_.front()->Size();
-  int ret = DoSqueeze(in_tensors_.front()->data_c(), out_tensors_.front()->data_c(), data_size);
-
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Do squeeze fail!ret: " << ret;
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
-REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
-REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeBool, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeBaseCPUKernel>)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
similarity index 50%
rename from mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
rename to mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
index 400dfe1f3e..e9a3a1dd1a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/squeeze_base.h
@@ -13,30 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_
 
 #include <vector>
-#include "src/lite_kernel.h"
-#include "include/context.h"
-#include "nnacl/base/reshape_base.h"
+#include "src/runtime/kernel/arm/base/reshape_base.h"
 
 using mindspore::lite::InnerContext;
-
 namespace mindspore::kernel {
-class ReshapeCPUKernel : public LiteKernel {
+class SqueezeBaseCPUKernel : public ReshapeBaseCPUKernel {
  public:
-  ReshapeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                   const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
-                   const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~ReshapeCPUKernel() = default;
-
-  int Init() override;
-  int ReSize() override;
-  int Run() override;
+  SqueezeBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                       const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                       const mindspore::lite::PrimitiveC *primitive)
+      : ReshapeBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~SqueezeBaseCPUKernel() override = default;
 };
 }  // namespace mindspore::kernel
 
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_RESHAPE_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SQUEEZE_BASE_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
index a7ebac6b46..3b327a0e5a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
@@ -27,7 +27,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Stack;
 
 namespace mindspore::kernel {
-static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
+static inline int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
   int copy_num = 1;
   if (axis > 0) {
     for (int j = n_dim - 1; j > axis - 1; j--) {
@@ -41,12 +41,12 @@ static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
   return copy_num;
 }
 
-static size_t GetOutterSize(const std::vector<int> &in_shape, int axis) {
-  size_t outter_size = 1;
+static inline size_t GetOuterSize(const std::vector<int> &in_shape, int axis) {
+  size_t outer_size = 1;
   for (int i = 0; i < axis; ++i) {
-    outter_size *= in_shape[i];
+    outer_size *= in_shape[i];
   }
-  return outter_size;
+  return outer_size;
 }
 
 int StackBaseCPUKernel::ReSize() {
@@ -59,14 +59,13 @@ int StackBaseCPUKernel::ReSize() {
   } else {
     MS_ASSERT(input_nums > 1);
     copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_;
-    outter_size_ = GetOutterSize(input0_shape, axis_);
+    outer_size_ = GetOuterSize(input0_shape, axis_);
   }
   return RET_OK;
 }
 
 int StackBaseCPUKernel::Init() {
-  auto input0_tensor = in_tensors_.front();
-  data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum();
+  data_type_size_ = sizeof(float);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -74,13 +73,21 @@ int StackBaseCPUKernel::Init() {
 }
 
 int StackBaseCPUKernel::Run() {
+  // malloc temporary memory to store all the inputs
   size_t inputs_num = in_tensors_.size();
   char **all_inputs = static_cast<char **>(context_->allocator->Malloc(inputs_num * sizeof(char *)));
+  if (all_inputs == nullptr) {
+    MS_LOG(ERROR) << "malloc all_inputs failed.";
+    return RET_ERROR;
+  }
   for (size_t j = 0; j < inputs_num; ++j) {
     all_inputs[j] = reinterpret_cast<char *>(in_tensors_.at(j)->data_c());
   }
+  // run stack
   auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c());
-  Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_);
+  Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outer_size_);
+
+  // free temporary variable all_inputs
   context_->allocator->Free(all_inputs);
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
index d78ecf76a9..4ea68271c2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
@@ -38,7 +38,7 @@ class StackBaseCPUKernel : public LiteKernel {
   int axis_ = 0;
   size_t data_type_size_ = 0;
   size_t copy_size_ = 0;
-  size_t outter_size_ = 1;
+  size_t outer_size_ = 1;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
index 77c896d405..31479ec782 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.cc
@@ -33,11 +33,37 @@ int StridedSliceCPUKernel::Init() {
   if (!InferShapeDone()) {
     return RET_OK;
   }
-
   return ReSize();
 }
 
+void StridedSliceCPUKernel::InitFastRunParam() {
+  auto in_shape = in_tensors_.front()->shape();
+  auto out_shape = out_tensors_.front()->shape();
+  // cal inner, outer
+  for (int i = 0; i < split_axis_; ++i) {
+    outer_ *= in_shape[i];
+  }
+  int inner = 1;
+  for (size_t i = split_axis_ + 1; i < in_shape.size(); i++) {
+    inner *= in_shape[i];
+  }
+  inner_size_ = in_tensors_.front()->Size() / in_tensors_.front()->ElementsNum() * inner;
+
+  // decide multi-thread launch strategy
+  if (outer_ == 1) {
+    parallel_on_split_axis_ = true;
+    cal_num_per_thread_ = UP_DIV(out_shape[split_axis_], context_->thread_num_);
+  } else {
+    parallel_on_outer_ = true;
+    cal_num_per_thread_ = UP_DIV(outer_, context_->thread_num_);
+  }
+}
+
 int StridedSliceCPUKernel::ReSize() {
+  fast_run_ = MatchFastPattern();
+  if (fast_run_) {
+    InitFastRunParam();
+  }
   if (op_parameter_ != nullptr) {
     free(op_parameter_);
     op_parameter_ = nullptr;
@@ -51,7 +77,82 @@ int StridedSliceCPUKernel::ReSize() {
   return RET_OK;
 }
 
-int StridedSliceCPUKernel::Run() {
+bool StridedSliceCPUKernel::MatchFastPattern() {
+  // This function is seeking if that the number of only one dimension
+  // is different between input and output. If so, we can do some trick.
+  // Example 1:
+  // input shape info:  [1, 80, 46, 40]
+  // output shape info: [1, 80, 20, 40]
+  // Example 2:
+  // input shape info:  [1, 46, 40]
+  // output shape info: [1, 20, 40]
+  auto in_shape = in_tensors_.front()->shape();
+  auto out_shape = out_tensors_.front()->shape();
+  if (in_shape.size() != out_shape.size()) {
+    return false;
+  }
+  std::vector<int> axis_list;
+  for (size_t i = 0; i < in_shape.size(); ++i) {
+    if (in_shape[i] != out_shape[i]) {
+      axis_list.emplace_back(i);
+    }
+  }
+  if (axis_list.size() == 1) {
+    split_axis_ = axis_list.front();
+    return true;
+  }
+  return false;
+}
+
+int StridedSliceCPUKernel::FastRunImpl(int task_id) {
+  auto in_shape = in_tensors_.front()->shape();
+  auto out_shape = out_tensors_.front()->shape();
+  int begin_index = param_->begins_[split_axis_];
+  int caled_num = task_id * cal_num_per_thread_;
+  if (parallel_on_outer_) {
+    uint8_t *cur_in_ptr = input_ptr_ + (caled_num * in_shape[split_axis_] + begin_index) * inner_size_;
+    uint8_t *cur_out_ptr = output_ptr_ + caled_num * out_shape[split_axis_] * inner_size_;
+    int cur_outer = outer_ - caled_num;
+    if (cur_outer > cal_num_per_thread_) {
+      cur_outer = cal_num_per_thread_;
+    }
+    FastStride(cur_in_ptr, cur_out_ptr, out_shape[split_axis_], param_->strides_[split_axis_], cur_outer, inner_size_,
+               in_shape[split_axis_] * inner_size_);
+  } else {
+    MS_ASSERT(parallel_on_split_axis_);
+    uint8_t *cur_in_ptr = input_ptr_ + (caled_num * param_->strides_[split_axis_] + begin_index) * inner_size_;
+    uint8_t *cur_out_ptr = output_ptr_ + caled_num * inner_size_;
+    int cal_axis_num = out_shape[split_axis_] - caled_num;
+    if (cal_axis_num > cal_num_per_thread_) {
+      cal_axis_num = cal_num_per_thread_;
+    }
+    FastStride(cur_in_ptr, cur_out_ptr, cal_axis_num, param_->strides_[split_axis_], 1, inner_size_, 0);
+  }
+  return RET_OK;
+}
+
+int StrideRun(void *cdata, int task_id) {
+  auto stride = reinterpret_cast<StridedSliceCPUKernel *>(cdata);
+  auto ret = stride->FastRunImpl(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "StrideRun error task_id[" << task_id << "] error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int StridedSliceCPUKernel::FastRun() {
+  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.front()->data_c());
+  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.front()->data_c());
+  auto ret = ParallelLaunch(this->context_->thread_pool_, StrideRun, this, context_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Stride run error error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int StridedSliceCPUKernel::NormalRun() {
   auto input = in_tensors_.at(0);
   MS_ASSERT(input);
   switch (input->data_type()) {
@@ -61,6 +162,9 @@ int StridedSliceCPUKernel::Run() {
     case kNumberTypeFloat32:
       param_->data_type = kDataTypeFloat;
       break;
+    case kNumberTypeFloat16:
+      param_->data_type = kDataTypeFloat16;
+      break;
     case kNumberTypeInt32:
       param_->data_type = kDataTypeInt;
       break;
@@ -78,7 +182,15 @@ int StridedSliceCPUKernel::Run() {
   return RET_OK;
 }
 
+int StridedSliceCPUKernel::Run() {
+  if (fast_run_) {
+    return FastRun();
+  }
+  return NormalRun();
+}
+
 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_StridedSlice, LiteKernelCreator<StridedSliceCPUKernel>)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
index 0de0becec2..2e9f228a0f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/strided_slice.h
@@ -35,9 +35,23 @@ class StridedSliceCPUKernel : public LiteKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
+  bool MatchFastPattern();
+  void InitFastRunParam();
+  int NormalRun();
+  int FastRun();
+  int FastRunImpl(int task_id);
 
  private:
   StridedSliceParameter *param_;
+  uint8_t *input_ptr_ = nullptr;
+  uint8_t *output_ptr_ = nullptr;
+  int split_axis_{-1};
+  int outer_{1};
+  int cal_num_per_thread_{1};
+  size_t inner_size_{0};
+  bool fast_run_{false};
+  bool parallel_on_split_axis_{false};
+  bool parallel_on_outer_{false};
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
deleted file mode 100644
index 42d280768d..0000000000
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reshape_fp16.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/runtime/kernel/arm/fp16/reshape_fp16.h"
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
-#include "include/errorcode.h"
-
-using mindspore::kernel::KERNEL_ARCH::kCPU;
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Reshape;
-
-namespace mindspore::kernel {
-
-int ReshapeFp16CPUKernel::Run() {
-  auto in_tensor = in_tensors_.at(kInputIndex);
-  auto out_tensor = out_tensors_.at(kOutputIndex);
-
-  float16_t *input_ptr = reinterpret_cast<float16_t *>(in_tensor->data_c());
-  float16_t *output_ptr = reinterpret_cast<float16_t *>(out_tensor->data_c());
-
-  Reshape(input_ptr, output_ptr, out_tensor->Size());
-
-  return RET_OK;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Reshape, LiteKernelCreator<ReshapeFp16CPUKernel>)
-}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
index 9864b33397..068478e51c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@@ -82,7 +82,7 @@ int StackFp16CPUKernel::Run() {
     FreeBuffer();
     return ret;
   }
-  Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outter_size_);
+  Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outer_size_);
   // if output tensor is fp32, we need to transform
   if (malloc_out_) {
     auto out_tensor = out_tensors_.at(0);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
index a77031ef2c..93fac46927 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/expandDims_fp32.cc
@@ -82,8 +82,8 @@ int ExpandDimsRun(void *cdata, int task_id) {
 }
 
 int ExpandDimsCPUKernel::Run() {
-  in_ptr_ = in_tensors_.at(0)->MutableData();
-  out_ptr_ = out_tensors_.at(0)->MutableData();
+  in_ptr_ = in_tensors_.at(0)->data_c();
+  out_ptr_ = out_tensors_.at(0)->data_c();
   auto ret = ParallelLaunch(this->context_->thread_pool_, ExpandDimsRun, this, thread_sz_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ExpandDimsRun error error_code[" << ret << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
deleted file mode 100644
index d138775777..0000000000
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reshape_fp32.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/runtime/kernel/arm/fp32/reshape_fp32.h"
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
-#include "include/errorcode.h"
-
-using mindspore::kernel::KERNEL_ARCH::kCPU;
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Reshape;
-
-namespace mindspore::kernel {
-int ReshapeCPUKernel::Init() { return RET_OK; }
-
-int ReshapeCPUKernel::ReSize() { return RET_OK; }
-
-int ReshapeCPUKernel::Run() {
-  auto input_ptr = in_tensors_.at(kInputIndex)->data_c();
-  auto output_ptr = out_tensors_.at(kOutputIndex)->data_c();
-  size_t data_size = in_tensors_.at(kInputIndex)->Size();
-  MS_ASSERT(input_ptr);
-  MS_ASSERT(output_ptr);
-  Reshape(input_ptr, output_ptr, data_size);
-  return RET_OK;
-}
-
-REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeCPUKernel>)
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Reshape, LiteKernelCreator<ReshapeCPUKernel>)
-}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc
index f0cf0e0272..31e0bc84b9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/split_fp32.cc
@@ -75,44 +75,18 @@ int SplitRun(void *cdata, int task_id) {
 
 int SplitCPUKernel::Run() {
   auto in_tensor = in_tensors_.front();
-  input_ptr_ = reinterpret_cast<float *>(in_tensor->MutableData());
+  input_ptr_ = reinterpret_cast<float *>(in_tensor->data_c());
   for (int i = 0; i < param->num_split_; i++) {
-    output_ptr_.at(i) = reinterpret_cast<float *>(out_tensors_.at(i)->MutableData());
+    output_ptr_.at(i) = reinterpret_cast<float *>(out_tensors_.at(i)->data_c());
   }
   auto ret = ParallelLaunch(this->context_->thread_pool_, SplitRun, this, thread_n_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
   }
-
   return RET_OK;
 }
 
-kernel::LiteKernel *CpuSplitInt32KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const InnerContext *ctx, const kernel::KernelKey &desc,
-                                               const mindspore::lite::PrimitiveC *primitive) {
-  if (opParameter == nullptr) {
-    MS_LOG(ERROR) << "Input opParameter is nullptr!";
-    return nullptr;
-  }
-  MS_ASSERT(desc.type == schema::PrimitiveType_Split);
-  auto *kernel = new (std::nothrow) SplitCPUKernel(opParameter, inputs, outputs, ctx, primitive);
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "new SplitCPUKernel fail!";
-    free(opParameter);
-    return nullptr;
-  }
-  auto ret = kernel->Init();
-  if (ret != RET_OK) {
-    delete kernel;
-    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
-    return nullptr;
-  }
-  return kernel;
-}
-
 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Split, LiteKernelCreator<SplitCPUKernel>)
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Split, LiteKernelCreator<SplitCPUKernel>)
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h
deleted file mode 100644
index 5940d5c6e7..0000000000
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/squeeze_fp32.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_
-
-#include <vector>
-#include "src/lite_kernel.h"
-#include "nnacl/base/squeeze_base.h"
-
-namespace mindspore::kernel {
-
-class SqueezeCPUKernel : public LiteKernel {
- public:
-  explicit SqueezeCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                            const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                            const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~SqueezeCPUKernel() override = default;
-
-  int Init() override;
-  int ReSize() override;
-  int Run() override;
-};
-}  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SQUEEZE_H_