!11637 unsqueeze slice space-to-depth

From: @ling_qiao_min Reviewed-by: @zhang_xue_tong,@hangangqiang Signed-off-by: @zhang_xue_tong
4 years ago · a7016ea735
parent 74e4cc1876 336ba49ef2
commit a7016ea735
27 changed files with 92 additions and 301 deletions
--- a/mindspore/lite/nnacl/base/slice_base.c
+++ b/mindspore/lite/nnacl/base/slice_base.c
@ -13,11 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
-#include "nnacl/fp32/slice_fp32.h"
+#include "nnacl/base/slice_base.h"
 #include <string.h>
-#include "nnacl/op_base.h"
-
 void PadSliceParameterTo4D(SliceParameter *param) {
  int32_t begin[DIMENSION_4D];
  int32_t end[DIMENSION_4D];
@ -46,7 +43,10 @@ void PadSliceParameterTo4D(SliceParameter *param) {
  param->param_length_ = DIMENSION_4D;
 }

-void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id) {
+void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size) {
+  int8_t *int8_in = (int8_t *)input;
+  int8_t *int8_out = (int8_t *)output;
+
  int32_t out_dim1 = param->size_[1];
  int32_t out_dim2 = param->size_[2];
  int32_t out_dim3 = param->size_[3];
@ -55,7 +55,7 @@ void DoSlice(const float *input, float *output, const SliceParameter *param, int
  size_t out_stride0 = out_stride1 * out_dim1;
  size_t count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_);
  size_t thread_stride = thread_id * count_per_thread;
-  size_t copy_size = param->size_[3] * sizeof(float);
+  size_t copy_size = param->size_[3] * data_size;
  size_t in_stride2 = param->shape_[3];
  size_t in_stride1 = param->shape_[2] * in_stride2;
  size_t in_stride0 = param->shape_[1] * in_stride1;
@ -72,14 +72,17 @@ void DoSlice(const float *input, float *output, const SliceParameter *param, int
      for (int l = 0; l < out_dim2; ++l) {
        size_t out_offset = out_offset1 + l * out_stride2;
        size_t in_offset = in_offset1 + (l + param->begin_[2]) * in_stride2;
-        memcpy(output + out_offset, input + in_offset, copy_size);
+        memcpy(int8_out + out_offset * data_size, int8_in + in_offset * data_size, copy_size);
      }
    }
  }
 }

-void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param) {
-  size_t copy_size = param->size_[3] * sizeof(float);
+void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size) {
+  int8_t *int8_in = (int8_t *)input;
+  int8_t *int8_out = (int8_t *)output;
+
+  size_t copy_size = param->size_[3] * data_size;
  size_t in_stride2 = param->shape_[3];
  size_t in_stride1 = param->shape_[2] * in_stride2;
  size_t in_stride0 = param->shape_[1] * in_stride1;
@ -90,7 +93,7 @@ void DoSliceNoParallel(const float *input, float *output, const SliceParameter *
      size_t in_offset1 = dim1 * in_stride1 + in_offset0;
      for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) {
        size_t in_offset = in_offset1 + dim2 * in_stride2;
-        memcpy(output + out_offset, input + in_offset, copy_size);
+        memcpy(int8_out + out_offset * data_size, int8_in + in_offset * data_size, copy_size);
        out_offset += param->size_[3];
      }
    }
--- a/mindspore/lite/nnacl/base/slice_base.h
+++ b/mindspore/lite/nnacl/base/slice_base.h
@ -13,20 +13,22 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_NNACL_FP32_SLICE_H_
-#define MINDSPORE_LITE_NNACL_FP32_SLICE_H_
+#ifndef MINDSPORE_LITE_NNACL_BASE_SLICE_BASE_H_
+#define MINDSPORE_LITE_NNACL_BASE_SLICE_BASE_H_

 #include "nnacl/op_base.h"
+#include "nnacl/errorcode.h"
 #include "nnacl/slice_parameter.h"

 #ifdef __cplusplus
 extern "C" {
 #endif
 void PadSliceParameterTo4D(SliceParameter *param);
-void DoSlice(const float *input, float *output, const SliceParameter *param, int thread_id);
-void DoSliceNoParallel(const float *input, float *output, const SliceParameter *param);
+
+void DoSlice(const void *input, void *output, SliceParameter *param, int thread_id, int data_size);
+void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, int data_size);
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_FP32_SLICE_H_
+#endif  // MINDSPORE_LITE_NNACL_BASE_SLICE_BASE_H_
--- a/mindspore/lite/nnacl/base/space_to_depth_base.c
+++ b/mindspore/lite/nnacl/base/space_to_depth_base.c
@ -13,13 +13,13 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#include "nnacl/fp32/space_to_depth_fp32.h"
+
+#include "nnacl/base/space_to_depth_base.h"
 #include "nnacl/common_func.h"
 #include "nnacl/errorcode.h"
-#include "nnacl/op_base.h"

-int SpaceToDepthForNHWC(const float *input, float *output, const int *in_shape, const int *out_shape, int shape_size,
-                        int block_size, int h_start, int h_end) {
+int SpaceToDepthForNHWC(const void *input, void *output, const int *in_shape, const int *out_shape, int shape_size,
+                        int block_size, int h_start, int h_end, int data_size) {
  if (input == NULL || output == NULL) {
    return NNACL_NULL_PTR;
  }
@ -29,6 +29,10 @@ int SpaceToDepthForNHWC(const float *input, float *output, const int *in_shape,
  if (h_start < 0 || h_start >= h_end || h_end > out_shape[1]) {
    return NNACL_PARAM_INVALID;
  }
+
+  const int8_t *int8_input_ptr = (int8_t *)input;
+  int8_t *int8_outoput_ptr = (int8_t *)output;
+
  int in_strides[C4NUM];
  ComputeStrides(in_shape, in_strides, shape_size);
  int out_strides[C4NUM];
@ -43,8 +47,9 @@ int SpaceToDepthForNHWC(const float *input, float *output, const int *in_shape,
        size_t in_offset_w = in_offset_h + k * block_size * in_strides[2];
        size_t out_offset_w = out_offset_h + k * out_strides[2];
        for (int l = 0; l < block_size; ++l) {
-          memcpy(output + out_offset_w + l * block_size * in_strides[2], input + in_offset_w + l * in_strides[1],
-                 block_size * in_strides[2] * sizeof(float));
+          memcpy(int8_outoput_ptr + out_offset_w + l * block_size * in_strides[2] * data_size,
+                 int8_input_ptr + (in_offset_w + l * in_strides[1]) * data_size,
+                 block_size * in_strides[2] * data_size);
        }
      }
    }
--- a/mindspore/lite/nnacl/base/space_to_depth_base.h
+++ b/mindspore/lite/nnacl/base/space_to_depth_base.h
@ -13,22 +13,18 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_NNACL_FP16_SLICE_FP16_H_
-#define MINDSPORE_LITE_NNACL_FP16_SLICE_FP16_H_
+#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_BASE_SPACE_TO_DEPTH_BASE_H_
+#define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_BASE_SPACE_TO_DEPTH_BASE_H_

 #include "nnacl/op_base.h"
-#include "nnacl/slice_parameter.h"
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif

 #ifdef __cplusplus
 extern "C" {
 #endif
-void DoSliceFp16(const float16_t *input, float16_t *output, SliceParameter *param, int thread_id);
-void DoSliceFp16NoParallel(const float16_t *input, float16_t *output, SliceParameter *param);
+int SpaceToDepthForNHWC(const void *input, void *output, const int *in_shape, const int *out_shape, int shape_size,
+                        int block_size, int h_start, int h_end, int data_size);
 #ifdef __cplusplus
 }
 #endif

-#endif  // MINDSPORE_LITE_NNACL_FP16_SLICE_FP16_H_
+#endif  // MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_BASE_SPACE_TO_DEPTH_BASE_H_
--- a/mindspore/lite/nnacl/base/unsqueeze_base.h
+++ b/mindspore/lite/nnacl/base/unsqueeze_base.h
@ -14,11 +14,21 @@
 * limitations under the License.
 */

-#include "nnacl/fp32/unsqueeze_fp32.h"
-#include <string.h>
+#ifndef MINDSPORE_LITE_NNACL_BASE_UNSQUEEZE_BASE_H_
+#define MINDSPORE_LITE_NNACL_BASE_UNSQUEEZE_BASE_H_
+
+#include "nnacl/op_base.h"
 #include "nnacl/errorcode.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
 int Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, size_t data_size) {
  memcpy(output_ptr, input_ptr, data_size);
  return NNACL_OK;
 }
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_NNACL_BASE_UNSQUEEZE_BASE_H_
--- a/mindspore/lite/nnacl/fp16/slice_fp16.c
+++ b/mindspore/lite/nnacl/fp16/slice_fp16.c
@ -1,70 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "nnacl/fp16/slice_fp16.h"
-#include <string.h>
-#include "nnacl/op_base.h"
-#include "nnacl/errorcode.h"
-
-void DoSliceFp16(const float16_t *input, float16_t *output, SliceParameter *param, int thread_id) {
-  int32_t out_dim1 = param->size_[1];
-  int32_t out_dim2 = param->size_[2];
-  int32_t out_dim3 = param->size_[3];
-  size_t out_stride2 = out_dim3;
-  size_t out_stride1 = out_stride2 * out_dim2;
-  size_t out_stride0 = out_stride1 * out_dim1;
-  size_t count_per_thread = UP_DIV(out_dim1, param->op_parameter_.thread_num_);
-  size_t thread_stride = thread_id * count_per_thread;
-  size_t copy_size = param->size_[3] * sizeof(float16_t);
-  size_t in_stride2 = param->shape_[3];
-  size_t in_stride1 = param->shape_[2] * in_stride2;
-  size_t in_stride0 = param->shape_[1] * in_stride1;
-  for (int i = 0; i < param->size_[0]; ++i) {
-    size_t out_offset0 = i * out_stride0;
-    size_t in_offset0 = (i + param->begin_[0]) * in_stride0 + param->begin_[3];
-    for (size_t j = 0; j < count_per_thread; ++j) {
-      size_t k = j + thread_stride;
-      if (k >= out_dim1) {
-        break;
-      }
-      size_t out_offset1 = k * out_stride1 + out_offset0;
-      size_t in_offset1 = (k + param->begin_[1]) * in_stride1 + in_offset0;
-      for (int l = 0; l < out_dim2; ++l) {
-        size_t out_offset = out_offset1 + l * out_stride2;
-        size_t in_offset = in_offset1 + (l + param->begin_[2]) * in_stride2;
-        memcpy(output + out_offset, input + in_offset, copy_size);
-      }
-    }
-  }
-}
-
-void DoSliceFp16NoParallel(const float16_t *input, float16_t *output, SliceParameter *param) {
-  size_t copy_size = param->size_[3] * sizeof(float16_t);
-  size_t in_stride2 = param->shape_[3];
-  size_t in_stride1 = param->shape_[2] * in_stride2;
-  size_t in_stride0 = param->shape_[1] * in_stride1;
-  size_t out_offset = 0;
-  for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) {
-    size_t in_offset0 = dim0 * in_stride0 + param->begin_[3];
-    for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) {
-      size_t in_offset1 = dim1 * in_stride1 + in_offset0;
-      for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) {
-        size_t in_offset = in_offset1 + dim2 * in_stride2;
-        memcpy(output + out_offset, input + in_offset, copy_size);
-        out_offset += param->size_[3];
-      }
-    }
-  }
-}
--- a/mindspore/lite/nnacl/fp32/unsqueeze_fp32.h
+++ b/mindspore/lite/nnacl/fp32/unsqueeze_fp32.h
@ -1,41 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_LITE_NNACL_UNSQUEEZE_H_
-#define MINDSPORE_LITE_NNACL_UNSQUEEZE_H_
-
-#include "nnacl/op_base.h"
-
-#define UNSQUEEZE_DIMS_MAX_SIZE 4
-
-typedef struct UnsqueezeParameter {
-  // primitive parameter
-  OpParameter op_parameter_;
-  int dims_[UNSQUEEZE_DIMS_MAX_SIZE];
-
-  // other parameter
-  int num_dim_;
-} UnsqueezeParameter;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-int Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, size_t data_size);
-#ifdef __cplusplus
-}
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_UNSQUEEZE_H_
--- a/mindspore/lite/nnacl/matmul_parameter.h
+++ b/mindspore/lite/nnacl/matmul_parameter.h
@ -37,6 +37,8 @@ typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } Ou
 typedef struct MatMulParameter {
  // Primitive parameter
  OpParameter op_parameter_;
+  bool has_bias_;
+
  // other parameter
  int row_;
  int col_;
@ -54,7 +56,6 @@ typedef struct MatMulParameter {
  int deep_;
  int deep_4_;
  int deep_16_;
-  bool has_bias_;
  int batch;
  bool a_transpose_; /* false :  row-major  */
  bool b_transpose_; /* true  :  col-major  */
--- a/mindspore/lite/nnacl/space_to_depth_parameter.h
+++ b/mindspore/lite/nnacl/space_to_depth_parameter.h
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_DEPTH_H_
-#define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_DEPTH_H_
+#ifndef MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_SPACE_TO_DEPTH_PARAMETER_H_
+#define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_SPACE_TO_DEPTH_PARAMETER_H_
 #include "nnacl/op_base.h"

 typedef struct SpaceToDepthParameter {
@ -22,13 +22,5 @@ typedef struct SpaceToDepthParameter {
  OpParameter op_parameter_;
  int32_t block_size_;
 } SpaceToDepthParameter;
-#ifdef __cplusplus
-extern "C" {
-#endif
-int SpaceToDepthForNHWC(const float *input, float *output, const int *in_shape, const int *out_shape, int shape_size,
-                        int block_size, int h_start, int h_end);
-#ifdef __cplusplus
-}
-#endif

-#endif  // MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_DEPTH_H_
+#endif  // MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_SPACE_TO_DEPTH_PARAMETER_H_
--- a/mindspore/lite/nnacl/unsqueeze_parameter.h
+++ b/mindspore/lite/nnacl/unsqueeze_parameter.h
@ -21,17 +21,13 @@
 #include <math.h>
 #include "nnacl/op_base.h"

-#define UNSQUEEZE_OFFSET_MAX_SIZE 4
+#define UNSQUEEZE_MAX_SIZE 4

 typedef struct UnSqueezeQuantArg {
-  int *input_sizes_;
-  int output_size_;
-  int **input_shapes_;
  int *output_shape_;
  float alpha;
  int axis_;
  size_t input_num_;
-  size_t output_dim_;
  QuantArg in_quant_args_;
  QuantArg out_quant_args_;
 } UnSqueezeQuantArg;
@ -39,20 +35,17 @@ typedef struct UnSqueezeQuantArg {
 typedef struct UnSqueezeParameter {
  // primitive parameter
  OpParameter op_parameter_;
-  int64_t axis_;
+  int dims_[UNSQUEEZE_MAX_SIZE];

  // shape correlative
  const int *in_shape_;
  const int *out_shape_;
-  int input_dim_;
-  int64_t offset_[UNSQUEEZE_OFFSET_MAX_SIZE];
-  int64_t in_offset_[UNSQUEEZE_OFFSET_MAX_SIZE];
+  int64_t offset_[UNSQUEEZE_MAX_SIZE];
+  int64_t axis_;

  // other parameter
  UnSqueezeQuantArg quant_arg;
  int thread_count_;
-  int thread_id_;
-  int offset_size_;
 } UnSqueezeParameter;

 #endif  // MINDSPORE_LITE_NNACL_UNSQUEEZE_PARAMETER_H_
--- a/mindspore/lite/src/ops/populate/space_to_depth_populate.cc
+++ b/mindspore/lite/src/ops/populate/space_to_depth_populate.cc
@ -18,7 +18,7 @@
 #include "src/common/common.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/space_to_depth_fp32.h"
+#include "nnacl/space_to_depth_parameter.h"

 namespace mindspore {
 namespace lite {
--- a/mindspore/lite/src/ops/populate/unsqueeze_populate.cc
+++ b/mindspore/lite/src/ops/populate/unsqueeze_populate.cc
@ -19,23 +19,20 @@
 #include "src/tensor.h"
 #include "src/ops/primitive_c.h"
 #include "src/ops/populate/populate_register.h"
-#include "nnacl/fp32/unsqueeze_fp32.h"
+#include "mindspore/lite/nnacl/unsqueeze_parameter.h"

 namespace mindspore {
 namespace lite {
-
 OpParameter *PopulateUnsqueezeParameter(const mindspore::lite::PrimitiveC *primitive) {
-  auto unsqueeze_attr =
-    reinterpret_cast<mindspore::lite::Unsqueeze *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
-  UnsqueezeParameter *unsqueeze_param = reinterpret_cast<UnsqueezeParameter *>(malloc(sizeof(UnsqueezeParameter)));
+  auto unsqueeze_attr = reinterpret_cast<lite::Unsqueeze *>(const_cast<lite::PrimitiveC *>(primitive));
+  UnSqueezeParameter *unsqueeze_param = reinterpret_cast<UnSqueezeParameter *>(malloc(sizeof(UnSqueezeParameter)));
  if (unsqueeze_param == nullptr) {
    MS_LOG(ERROR) << "malloc UnsqueezeParameter failed.";
    return nullptr;
  }
-  memset(unsqueeze_param, 0, sizeof(UnsqueezeParameter));
+  memset(unsqueeze_param, 0, sizeof(UnSqueezeParameter));
  unsqueeze_param->op_parameter_.type_ = primitive->Type();
  auto flatAxis = unsqueeze_attr->GetAxis();
-  unsqueeze_param->num_dim_ = flatAxis.size();
  int i = 0;
  for (auto iter = flatAxis.begin(); iter != flatAxis.end(); iter++) {
    unsqueeze_param->dims_[i++] = *iter;
--- a/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/pooling_base.cc
@ -14,19 +14,12 @@
 * limitations under the License.
 */
 #include "src/runtime/kernel/arm/base/pooling_base.h"
-#include <vector>
-#include "src/runtime/kernel/arm/fp32/pooling_fp32.h"
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
 #include "include/errorcode.h"
-#include "include/context.h"
 #include "src/ops/pooling.h"

-using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Pooling;

 namespace mindspore::kernel {
 int PoolingBaseCPUKernel::SetQuantParam() {
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@ -13,10 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#include "src/runtime/kernel/arm/fp32/slice_fp32.h"
+#include "src/runtime/kernel/arm/base/slice_base.h"
 #include "src/kernel_registry.h"
-#include "nnacl/fp32/slice_fp32.h"
+#include "nnacl/base/slice_base.h"
 #include "src/ops/slice.h"
+#include "src/tensor.h"

 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@ -63,11 +64,8 @@ int SliceCPUKernel::Init() {
 }

 int SliceCPUKernel::SliceParallelRun(int thread_id) {
-  const float *input_data = reinterpret_cast<const float *>(in_tensors_.at(0)->MutableData());
-  float *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  MS_ASSERT(input_data);
-  MS_ASSERT(output_data);
-  DoSlice(input_data, output_data, param_, thread_id);
+  DoSlice(in_tensors_.at(0)->data_c(), out_tensors_.at(0)->data_c(), param_, thread_id,
+          lite::DataTypeSize(in_tensors_.at(0)->data_type()));
  return RET_OK;
 }

@ -77,10 +75,10 @@ int SliceCPUKernel::Run() {
    MS_LOG(ERROR) << "PreProcess fail!ret: " << ret;
    return ret;
  }
-  const float *input_data = reinterpret_cast<const float *>(in_tensors_.at(0)->MutableData());
-  float *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+
  if (param_->size_[1] < op_parameter_->thread_num_) {
-    DoSliceNoParallel(input_data, output_data, param_);
+    DoSliceNoParallel(in_tensors_.at(0)->data_c(), out_tensors_.at(0)->data_c(), param_,
+                      lite::DataTypeSize(in_tensors_.at(0)->data_type()));
    return RET_OK;
  }
  ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_);
@ -92,5 +90,6 @@ int SliceCPUKernel::Run() {
 }

 REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Slice, LiteKernelCreator<SliceCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Slice, LiteKernelCreator<SliceCPUKernel>)
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Slice, LiteKernelCreator<SliceCPUKernel>)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.h
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SLICE_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SLICE_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SLICE_BASE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SLICE_BASE_H_

 #include <vector>
 #include "src/lite_kernel.h"
@ -34,12 +34,12 @@ class SliceCPUKernel : public LiteKernel {
  int Init() override;
  int ReSize() override;
  int Run() override;
-  virtual int SliceParallelRun(int thread_id);
+
+ public:
+  int SliceParallelRun(int thread_id);

 protected:
  SliceParameter *param_;
 };
-int SliceLaunch(void *cdata, int task_id);
 }  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SLICE_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_SLICE_BASE_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.cc
@ -1,51 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "src/runtime/kernel/arm/fp16/slice_fp16.h"
-#include "src/runtime/kernel/arm/fp16/common_fp16.h"
-#include "src/kernel_registry.h"
-#include "nnacl/fp16/slice_fp16.h"
-
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Slice;
-
-namespace mindspore::kernel {
-int SliceFp16CPUKernel::SliceParallelRun(int thread_id) {
-  DoSliceFp16(input_fp16_, output_fp16_, param_, thread_id);
-  return RET_OK;
-}
-
-int SliceFp16CPUKernel::Run() {
-  auto input_tensor = in_tensors_.at(0);
-  auto output_tensor = out_tensors_.at(0);
-
-  input_fp16_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
-  output_fp16_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-
-  if (param_->size_[1] < op_parameter_->thread_num_) {
-    DoSliceFp16NoParallel(input_fp16_, output_fp16_, param_);
-    return RET_OK;
-  }
-  auto ret = ParallelLaunch(this->context_->thread_pool_, SliceLaunch, this, op_parameter_->thread_num_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "slice launch fail!ret: " << ret;
-  }
-  return ret;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Slice, LiteKernelCreator<SliceFp16CPUKernel>)
-}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/slice_fp16.h
@ -1,40 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SLICE_FP16_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SLICE_FP16_H_
-
-#include <vector>
-#include "src/runtime/kernel/arm/fp32/slice_fp32.h"
-
-namespace mindspore::kernel {
-class SliceFp16CPUKernel : public SliceCPUKernel {
- public:
-  SliceFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                     const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                     const mindspore::lite::PrimitiveC *primitive)
-      : SliceCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~SliceFp16CPUKernel() = default;
-
-  int Run() override;
-  int SliceParallelRun(int thread_id) override;
-
- protected:
-  float16_t *input_fp16_ = nullptr;
-  float16_t *output_fp16_ = nullptr;
-};
-}  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_SLICE_FP16_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
@ -19,7 +19,8 @@
 #include <vector>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "nnacl/fp32/space_to_depth_fp32.h"
+#include "nnacl/space_to_depth_parameter.h"
+#include "nnacl/base/space_to_depth_base.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"

@ -70,7 +71,7 @@ int SpaceToDepthCPUKernel::SpaceToDepth(int task_id) {
  MS_ASSERT(input_ptr_);
  MS_ASSERT(output_ptr_);
  auto ret = SpaceToDepthForNHWC(input_ptr_, output_ptr_, in_shape.data(), out_shape.data(), in_shape.size(),
-                                 param->block_size_, thread_offset, thread_offset + num_unit_thread);
+                                 param->block_size_, thread_offset, thread_offset + num_unit_thread, sizeof(float));
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "SpaceToDepth error task_id[" << task_id << "] error_code[" << ret << "]";
    return RET_ERROR;
@ -89,8 +90,8 @@ int SpaceToDepthRun(void *cdata, int task_id) {
 }

 int SpaceToDepthCPUKernel::Run() {
-  input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+  input_ptr_ = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
+  output_ptr_ = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
  if (in_tensors_.at(0)->format() == schema::Format::Format_NHWC) {
    auto ret = ParallelLaunch(this->context_->thread_pool_, SpaceToDepthRun, this, thread_h_num_);
    if (ret != RET_OK) {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze_fp32.cc
@ -19,6 +19,7 @@
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
+#include "nnacl/base/unsqueeze_base.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unsqueeze_fp32.h
@ -19,7 +19,7 @@
 #include <vector>
 #include "src/lite_kernel.h"
 #include "include/context.h"
-#include "nnacl/fp32/unsqueeze_fp32.h"
+#include "nnacl/unsqueeze_parameter.h"

 using mindspore::lite::InnerContext;

--- a/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/slice_int8.h
@ -18,7 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SLICE_INT8_H_

 #include <vector>
-#include "src/runtime/kernel/arm/fp32/slice_fp32.h"
+#include "src/runtime/kernel/arm/base/slice_base.h"
 #include "mindspore/lite/nnacl/int8/quantize.h"

 namespace mindspore::kernel {
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
@ -21,7 +21,7 @@
 #include <string>
 #include "src/lite_kernel.h"
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
-#include "nnacl/fp32/space_to_depth_fp32.h"
+#include "nnacl/space_to_depth_parameter.h"

 namespace mindspore::kernel {
 class SpaceToDepthOpenCLKernel : public OpenCLKernel {
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
@ -19,7 +19,7 @@

 #include <vector>
 #include "src/runtime/kernel/opencl/opencl_kernel.h"
-#include "nnacl/fp32/slice_fp32.h"
+#include "nnacl/base/slice_base.h"

 namespace mindspore::kernel {

--- a/mindspore/lite/test/runtest.sh
+++ b/mindspore/lite/test/runtest.sh
@ -32,7 +32,6 @@ echo 'run common ut tests'
 # test cases of FP32 OP
 ./lite-test --gtest_filter=TestFcFp32*
 ./lite-test --gtest_filter=TestConv1x1Fp32*
-./lite-test --gtest_filter=TestStrassenFp32*
 ## ./lite-test --gtest_filter=TestDeConvolutionFp32*

 # test cases of INT8 OP
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_depth_fp32_tests.cc
@ -18,7 +18,8 @@
 #include <memory>
 #include "src/common/log_adapter.h"
 #include "common/common_test.h"
-#include "mindspore/lite/nnacl/fp32/space_to_depth_fp32.h"
+#include "mindspore/lite/nnacl/space_to_depth_parameter.h"
+#include "mindspore/lite/nnacl/base/space_to_depth_base.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "mindspore/lite/src/lite_kernel.h"

@ -39,7 +40,7 @@ TEST_F(SpaceToDepthTestFp32, SpaceToDepthTest1) {
  int out_shape[4] = {1, 2, 2, 4};
  int h_start = 0;
  int h_end = 2;
-  SpaceToDepthForNHWC((const float *)input, output, in_shape, out_shape, 4, 2, h_start, h_end);
+  SpaceToDepthForNHWC((const float *)input, output, in_shape, out_shape, 4, 2, h_start, h_end, sizeof(float));
  for (int i = 0; i < out_size; ++i) {
    std::cout << output[i] << " ";
  }
--- a/Show More
+++ b/Show More