From 280b84b7aa565d0ecc801ae628569044fa2a2f34 Mon Sep 17 00:00:00 2001
From: fuzhiye <fuzhiye@huawei.com>
Date: Fri, 15 Jan 2021 11:00:44 +0800
Subject: [PATCH] extend stack more than 4 dimensions

---
 .../{fp32/stack_fp32.h => base/stack_base.c}  | 27 +++---
 .../{fp16/stack_fp16.h => base/stack_base.h}  | 15 ++--
 mindspore/lite/nnacl/fp16/stack_fp16.c        | 54 -----------
 mindspore/lite/nnacl/fp32/stack_fp32.c        | 72 ---------------
 mindspore/lite/nnacl/op_base.h                |  1 +
 mindspore/lite/nnacl/winograd_utils.h         |  2 -
 .../src/runtime/kernel/arm/base/stack_base.cc | 90 +++++++++++++++++++
 .../{fp32/stack_fp32.h => base/stack_base.h}  | 23 ++---
 .../src/runtime/kernel/arm/fp16/stack_fp16.cc | 33 +++----
 .../src/runtime/kernel/arm/fp16/stack_fp16.h  | 12 +--
 .../arm/fp32/convolution_delegate_fp32.cc     |  2 +-
 .../src/runtime/kernel/arm/fp32/stack_fp32.cc | 81 -----------------
 .../kernel/arm/fp32/stack_fp32_test.cc        | 13 ++-
 13 files changed, 147 insertions(+), 278 deletions(-)
 rename mindspore/lite/nnacl/{fp32/stack_fp32.h => base/stack_base.c} (50%)
 rename mindspore/lite/nnacl/{fp16/stack_fp16.h => base/stack_base.h} (67%)
 delete mode 100644 mindspore/lite/nnacl/fp16/stack_fp16.c
 delete mode 100644 mindspore/lite/nnacl/fp32/stack_fp32.c
 create mode 100644 mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
 rename mindspore/lite/src/runtime/kernel/arm/{fp32/stack_fp32.h => base/stack_base.h} (56%)
 delete mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.cc

diff --git a/mindspore/lite/nnacl/fp32/stack_fp32.h b/mindspore/lite/nnacl/base/stack_base.c
similarity index 50%
rename from mindspore/lite/nnacl/fp32/stack_fp32.h
rename to mindspore/lite/nnacl/base/stack_base.c
index 5c3b1b1853..3ccddb2f86 100644
--- a/mindspore/lite/nnacl/fp32/stack_fp32.h
+++ b/mindspore/lite/nnacl/base/stack_base.c
@@ -13,21 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_LITE_NNACL_FP32_STACK_H_
-#define MINDSPORE_LITE_NNACL_FP32_STACK_H_
+#include "nnacl/base/stack_base.h"
 
-#include "nnacl/op_base.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis,
-             float *output);
-void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis,
-                  int32_t *output);
-void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size);
-#ifdef __cplusplus
+void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size) {
+  size_t in_offset = 0;
+  size_t out_offset = 0;
+  for (size_t i = 0; i < outter_size; ++i) {
+    for (size_t j = 0; j < input_num; ++j) {
+      memcpy(output + out_offset, inputs[j] + in_offset, copy_size);
+      out_offset += copy_size;
+    }
+    in_offset += copy_size;
+  }
 }
-#endif
-
-#endif  // MINDSPORE_LITE_NNACL_FP32_STACK_H_
diff --git a/mindspore/lite/nnacl/fp16/stack_fp16.h b/mindspore/lite/nnacl/base/stack_base.h
similarity index 67%
rename from mindspore/lite/nnacl/fp16/stack_fp16.h
rename to mindspore/lite/nnacl/base/stack_base.h
index 83062a7925..0ccfa7a5e3 100644
--- a/mindspore/lite/nnacl/fp16/stack_fp16.h
+++ b/mindspore/lite/nnacl/base/stack_base.h
@@ -13,21 +13,18 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_
-#define MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_
+#ifndef MINDSPORE_LITE_NNACL_STACK_H_
+#define MINDSPORE_LITE_NNACL_STACK_H_
 
+#include <string.h>
 #include "nnacl/op_base.h"
-#ifdef ENABLE_NEON
-#include <arm_neon.h>
-#endif
+#include "nnacl/stack_parameter.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
-void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis,
-                 float16_t *output);
+void Stack(char **inputs, char *output, size_t input_num, size_t copy_size, size_t outter_size);
 #ifdef __cplusplus
 }
 #endif
-
-#endif  // MINDSPORE_LITE_NNACL_FP16_STACK_FP16_H_
+#endif  // MINDSPORE_LITE_NNACL_STACK_H_
diff --git a/mindspore/lite/nnacl/fp16/stack_fp16.c b/mindspore/lite/nnacl/fp16/stack_fp16.c
deleted file mode 100644
index 4172053f91..0000000000
--- a/mindspore/lite/nnacl/fp16/stack_fp16.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp16/stack_fp16.h"
-#include "nnacl/common_func.h"
-
-size_t Fp16GetStackCopyNum(int axis, int *in_shape, size_t shape_size) {
-  size_t one_input_size = 1;
-  for (size_t i = 0; i < shape_size; ++i) {
-    one_input_size *= in_shape[i];
-  }
-  int in_strides[4];
-  ComputeStrides(in_shape, in_strides, shape_size);
-
-  size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size;
-  return copy_num;
-}
-
-size_t Fp16GetStackPreAxisCount2(const int *in_shape, int axis) {
-  size_t pre_axis_count = 1;
-  for (size_t i = 0; i < axis; ++i) {
-    pre_axis_count *= in_shape[i];
-  }
-  return pre_axis_count;
-}
-
-void DoStackFp16(const float16_t *const *inputs, size_t input_num, int *in_shape, size_t shape_size, int axis,
-                 float16_t *output) {
-  size_t copy_num = Fp16GetStackCopyNum(axis, in_shape, shape_size);
-  size_t copy_size = copy_num * sizeof(float16_t);
-  size_t pre_axis_count = Fp16GetStackPreAxisCount2(in_shape, axis);
-  size_t in_offset = 0;
-  size_t out_offset = 0;
-  for (size_t i = 0; i < pre_axis_count; ++i) {
-    for (size_t j = 0; j < input_num; ++j) {
-      memcpy(output + out_offset, inputs[j] + in_offset, copy_size);
-      out_offset += copy_num;
-    }
-    in_offset += copy_num;
-  }
-}
diff --git a/mindspore/lite/nnacl/fp32/stack_fp32.c b/mindspore/lite/nnacl/fp32/stack_fp32.c
deleted file mode 100644
index a35dec49c0..0000000000
--- a/mindspore/lite/nnacl/fp32/stack_fp32.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "nnacl/fp32/stack_fp32.h"
-#include "nnacl/common_func.h"
-
-size_t GetStackCopyNum(int axis, const int *in_shape, size_t shape_size) {
-  size_t one_input_size = 1;
-  for (size_t i = 0; i < shape_size; ++i) {
-    one_input_size *= in_shape[i];
-  }
-  int in_strides[4];
-  ComputeStrides(in_shape, in_strides, shape_size);
-
-  size_t copy_num = axis > 0 ? in_strides[axis - 1] : one_input_size;
-  return copy_num;
-}
-
-size_t GetStackPreAxisCount(const int *in_shape, int axis) {
-  size_t pre_axis_count = 1;
-  for (size_t i = 0; i < axis; ++i) {
-    pre_axis_count *= in_shape[i];
-  }
-  return pre_axis_count;
-}
-
-void DoStack(const float *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis,
-             float *output) {
-  size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size);
-  size_t copy_size = copy_num * sizeof(float);
-  size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis);
-  size_t in_offset = 0;
-  size_t out_offset = 0;
-  for (size_t i = 0; i < pre_axis_count; ++i) {
-    for (size_t j = 0; j < input_num; ++j) {
-      memcpy(output + out_offset, inputs[j] + in_offset, copy_size);
-      out_offset += copy_num;
-    }
-    in_offset += copy_num;
-  }
-}
-
-void DoStackInt32(const int32_t *const *inputs, size_t input_num, const int *in_shape, size_t shape_size, int axis,
-                  int32_t *output) {
-  size_t copy_num = GetStackCopyNum(axis, in_shape, shape_size);
-  size_t copy_size = copy_num * sizeof(int32_t);
-  size_t pre_axis_count = GetStackPreAxisCount(in_shape, axis);
-  size_t in_offset = 0;
-  size_t out_offset = 0;
-  for (size_t i = 0; i < pre_axis_count; ++i) {
-    for (size_t j = 0; j < input_num; ++j) {
-      memcpy(output + out_offset, inputs[j] + in_offset, copy_size);
-      out_offset += copy_num;
-    }
-    in_offset += copy_num;
-  }
-}
-
-void DoStackOneInput(const int8_t *input, int8_t *output, size_t data_size) { memcpy(output, input, data_size); }
diff --git a/mindspore/lite/nnacl/op_base.h b/mindspore/lite/nnacl/op_base.h
index 24d7cdecbb..f9cb3d6c44 100644
--- a/mindspore/lite/nnacl/op_base.h
+++ b/mindspore/lite/nnacl/op_base.h
@@ -60,6 +60,7 @@
 #define kNHWC_C 3
 #define kInputSize1 2
 #define kInputSize2 3
+#define MAX_LEN 256
 
 typedef enum LiteDataType {
   kDataTypeFloat,
diff --git a/mindspore/lite/nnacl/winograd_utils.h b/mindspore/lite/nnacl/winograd_utils.h
index 34f170a753..baccdef5d2 100644
--- a/mindspore/lite/nnacl/winograd_utils.h
+++ b/mindspore/lite/nnacl/winograd_utils.h
@@ -23,8 +23,6 @@
 #include "nnacl/conv_parameter.h"
 #include "nnacl/op_base.h"
 
-#define MAX_LEN 256
-
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
new file mode 100644
index 0000000000..a7ebac6b46
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
@@ -0,0 +1,90 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/base/stack_base.h"
+#include <vector>
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "nnacl/base/stack_base.h"
+#include "nnacl/stack_parameter.h"
+#include "include/errorcode.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_Stack;
+
+namespace mindspore::kernel {
+static int GetCopyNum(const std::vector<int> &in_shape, int axis, int n_dim) {
+  int copy_num = 1;
+  if (axis > 0) {
+    for (int j = n_dim - 1; j > axis - 1; j--) {
+      copy_num *= in_shape[j];
+    }
+  } else {
+    for (int i = 0; i < n_dim; ++i) {
+      copy_num *= in_shape[i];
+    }
+  }
+  return copy_num;
+}
+
+static size_t GetOutterSize(const std::vector<int> &in_shape, int axis) {
+  size_t outter_size = 1;
+  for (int i = 0; i < axis; ++i) {
+    outter_size *= in_shape[i];
+  }
+  return outter_size;
+}
+
+int StackBaseCPUKernel::ReSize() {
+  auto param = reinterpret_cast<StackParameter *>(op_parameter_);
+  auto input0_shape = in_tensors_.front()->shape();
+  axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_;
+  auto input_nums = in_tensors_.size();
+  if (input_nums == 1) {
+    copy_size_ = in_tensors_.front()->Size();
+  } else {
+    MS_ASSERT(input_nums > 1);
+    copy_size_ = GetCopyNum(input0_shape, axis_, input0_shape.size()) * data_type_size_;
+    outter_size_ = GetOutterSize(input0_shape, axis_);
+  }
+  return RET_OK;
+}
+
+int StackBaseCPUKernel::Init() {
+  auto input0_tensor = in_tensors_.front();
+  data_type_size_ = input0_tensor->Size() / input0_tensor->ElementsNum();
+  if (!InferShapeDone()) {
+    return RET_OK;
+  }
+  return ReSize();
+}
+
+int StackBaseCPUKernel::Run() {
+  size_t inputs_num = in_tensors_.size();
+  char **all_inputs = static_cast<char **>(context_->allocator->Malloc(inputs_num * sizeof(char *)));
+  for (size_t j = 0; j < inputs_num; ++j) {
+    all_inputs[j] = reinterpret_cast<char *>(in_tensors_.at(j)->data_c());
+  }
+  auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c());
+  Stack(all_inputs, output_data, in_tensors_.size(), copy_size_, outter_size_);
+  context_->allocator->Free(all_inputs);
+  return RET_OK;
+}
+
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
+REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackBaseCPUKernel>)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.h b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
similarity index 56%
rename from mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.h
rename to mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
index 32ff146bac..d78ecf76a9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.h
@@ -13,21 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_
-#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
 
 #include <vector>
 #include "src/lite_kernel.h"
+#include "nnacl/stack_parameter.h"
 
+using mindspore::lite::InnerContext;
 namespace mindspore::kernel {
-class StackCPUKernel : public LiteKernel {
+class StackBaseCPUKernel : public LiteKernel {
  public:
-  StackCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
-                 const mindspore::lite::PrimitiveC *primitive)
+  StackBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
+                     const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-
-  ~StackCPUKernel() = default;
+  ~StackBaseCPUKernel() override = default;
 
   int Init() override;
   int ReSize() override;
@@ -35,7 +36,9 @@ class StackCPUKernel : public LiteKernel {
 
  protected:
   int axis_ = 0;
+  size_t data_type_size_ = 0;
+  size_t copy_size_ = 0;
+  size_t outter_size_ = 1;
 };
 }  // namespace mindspore::kernel
-
-#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_STACK_H_
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_BASE_STACK_BASE_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
index 2429b5aa82..9864b33397 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@@ -21,7 +21,7 @@
 #include "include/errorcode.h"
 #include "src/runtime/kernel/arm/fp16/common_fp16.h"
 #include "nnacl/fp16/cast_fp16.h"
-#include "nnacl/fp16/stack_fp16.h"
+#include "nnacl/base/stack_base.h"
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@@ -29,25 +29,18 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Stack;
 
 namespace mindspore::kernel {
-int StackFp16CPUKernel::Init() {
-  if (!InferShapeDone()) {
-    return RET_OK;
-  }
-  return ReSize();
-}
-
 void StackFp16CPUKernel::InitMallocFlags() {
   malloc_buffers_.resize(in_tensors_.size());
   for (size_t i = 0; i < in_tensors_.size(); ++i) {
     malloc_buffers_.at(i) = in_tensors_.at(i)->data_type() == kNumberTypeFloat32;
   }
-  malloc_out = out_tensors_.at(0)->data_type() == kNumberTypeFloat32;
+  malloc_out_ = out_tensors_.at(0)->data_type() == kNumberTypeFloat32;
 }
 
 int StackFp16CPUKernel::MallocAssignBuffer() {
   buffers_.resize(in_tensors_.size(), nullptr);
   for (size_t i = 0; i < in_tensors_.size(); ++i) {
-    buffers_.at(i) = ConvertInputFp32toFp16(in_tensors_.at(i), context_);
+    buffers_.at(i) = reinterpret_cast<char *>(ConvertInputFp32toFp16(in_tensors_.at(i), context_));
     if (buffers_.at(i) == nullptr) {
       return RET_ERROR;
     }
@@ -68,33 +61,33 @@ void StackFp16CPUKernel::FreeBuffer() {
       buffers_.at(i) = nullptr;
     }
   }
-  if (malloc_out && out_buffer_ != nullptr) {
+  if (malloc_out_ && out_buffer_ != nullptr) {
     context_->allocator->Free(out_buffer_);
     out_buffer_ = nullptr;
   }
 }
 
-int StackFp16CPUKernel::Run() {
-  size_t inputs_num = in_tensors_.size();
-  auto input0 = in_tensors_.at(0);
-  if (inputs_num == 1) {
-    memcpy(out_tensors_.at(0)->MutableData(), input0->MutableData(), input0->Size());
+int StackFp16CPUKernel::Init() {
+  data_type_size_ = sizeof(float16_t);
+  if (!InferShapeDone()) {
     return RET_OK;
   }
+  return ReSize();
+}
+
+int StackFp16CPUKernel::Run() {
   InitMallocFlags();
   auto ret = MallocAssignBuffer();
   if (ret != RET_OK) {
     FreeBuffer();
     return ret;
   }
-  auto input0_shape = input0->shape();
-  DoStackFp16(buffers_.data(), inputs_num, input0_shape.data(), input0_shape.size(), axis_, out_buffer_);
+  Stack(buffers_.data(), reinterpret_cast<char *>(out_buffer_), in_tensors_.size(), copy_size_, outter_size_);
   // if output tensor is fp32, we need to transform
-  if (malloc_out) {
+  if (malloc_out_) {
     auto out_tensor = out_tensors_.at(0);
     Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum());
   }
-
   FreeBuffer();
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
index 776a87d240..e234f418e4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.h
@@ -18,16 +18,16 @@
 
 #include <vector>
 #include "src/lite_kernel.h"
-#include "src/runtime/kernel/arm/fp32/stack_fp32.h"
+#include "src/runtime/kernel/arm/base/stack_base.h"
 
 namespace mindspore::kernel {
-class StackFp16CPUKernel : public StackCPUKernel {
+class StackFp16CPUKernel : public StackBaseCPUKernel {
  public:
   StackFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                      const mindspore::lite::PrimitiveC *primitive)
-      : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~StackFp16CPUKernel() = default;
+      : StackBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~StackFp16CPUKernel() override = default;
   int Init() override;
   int Run() override;
 
@@ -38,9 +38,9 @@ class StackFp16CPUKernel : public StackCPUKernel {
 
  private:
   std::vector<bool> malloc_buffers_;
-  std::vector<float16_t *> buffers_;
+  std::vector<char *> buffers_;
   float16_t *out_buffer_;
-  bool malloc_out;
+  bool malloc_out_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
index 7b5b7f3f97..d914834c80 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
@@ -114,7 +114,7 @@ int ConvolutionDelegateCPUKernel::Init() {
 }
 
 int ConvolutionDelegateCPUKernel::ReSize() {
-  // Updata shape info of input and output
+  // Update shape info of input and output
   SetInputOutputShapeInfo(reinterpret_cast<ConvParameter *>(op_parameter_), in_tensors_.front(), out_tensors_.front(),
                           context_);
   if (conv_kernel_ == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.cc
deleted file mode 100644
index cd86c086f7..0000000000
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/stack_fp32.cc
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "src/runtime/kernel/arm/fp32/stack_fp32.h"
-#include <vector>
-#include "schema/model_generated.h"
-#include "src/kernel_registry.h"
-#include "nnacl/fp32/stack_fp32.h"
-#include "nnacl/stack_parameter.h"
-#include "include/errorcode.h"
-
-using mindspore::lite::KernelRegistrar;
-using mindspore::lite::RET_ERROR;
-using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_Stack;
-
-namespace mindspore::kernel {
-int StackCPUKernel::ReSize() {
-  StackParameter *param = reinterpret_cast<StackParameter *>(op_parameter_);
-  auto input0_shape = in_tensors_.at(0)->shape();
-  axis_ = param->axis_ < 0 ? param->axis_ + input0_shape.size() + 1 : param->axis_;
-  return RET_OK;
-}
-
-int StackCPUKernel::Init() {
-  if (!InferShapeDone()) {
-    return RET_OK;
-  }
-
-  return ReSize();
-}
-
-int StackCPUKernel::Run() {
-  size_t inputs_num = in_tensors_.size();
-  auto input0 = in_tensors_.at(0);
-  if (inputs_num == 1) {
-    auto *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->MutableData());
-    MS_ASSERT(output_data);
-    auto *input_data = reinterpret_cast<const int8_t *>(input0->MutableData());
-    MS_ASSERT(input_data);
-    DoStackOneInput(input_data, output_data, input0->Size());
-    return RET_OK;
-  }
-  auto input0_shape = in_tensors_.at(0)->shape();
-  if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32 || in_tensors_.at(0)->data_type() == kNumberTypeFloat) {
-    auto *output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-    MS_ASSERT(output_data);
-    float *inputs[inputs_num];
-    for (size_t i = 0; i < inputs_num; ++i) {
-      inputs[i] = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData());
-      MS_ASSERT(inputs[i]);
-    }
-    DoStack(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
-  } else {
-    auto *output_data = reinterpret_cast<int32_t *>(out_tensors_.at(0)->MutableData());
-    MS_ASSERT(output_data);
-    int32_t *inputs[inputs_num];
-    for (size_t i = 0; i < inputs_num; ++i) {
-      inputs[i] = reinterpret_cast<int32_t *>(in_tensors_.at(i)->MutableData());
-      MS_ASSERT(inputs[i]);
-    }
-    DoStackInt32(inputs, inputs_num, input0_shape.data(), input0_shape.size(), axis_, output_data);
-  }
-  return RET_OK;
-}
-
-REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>)
-REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_Stack, LiteKernelCreator<StackCPUKernel>)
-}  // namespace mindspore::kernel
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/stack_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/stack_fp32_test.cc
index 50a1340863..8e3f48f56a 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/stack_fp32_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/stack_fp32_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "mindspore/lite/nnacl/fp32/stack_fp32.h"
+#include "mindspore/lite/nnacl/base/stack_base.h"
 
 namespace mindspore {
 class StackTestFp32 : public mindspore::CommonTest {
@@ -26,16 +26,15 @@ TEST_F(StackTestFp32, StackTest1) {
   float input0[6] = {1, 2, 3, 10, 20, 30};
   float input1[6] = {4, 5, 6, 40, 50, 60};
   float input2[6] = {7, 8, 9, 70, 80, 90};
-  float *input[3];
-  input[0] = input0;
-  input[1] = input1;
-  input[2] = input2;
+  char *input[3];
+  input[0] = reinterpret_cast<char *>(input0);
+  input[1] = reinterpret_cast<char *>(input1);
+  input[2] = reinterpret_cast<char *>(input2);
   std::vector<int> shape = {2, 3};
-  int axis = 2;
   constexpr int kOutSize = 18;
   float expect_out[kOutSize] = {1, 4, 7, 2, 5, 8, 3, 6, 9, 10, 40, 70, 20, 50, 80, 30, 60, 90};
   float output[kOutSize];
-  DoStack(input, 3, shape.data(), shape.size(), axis, output);
+  Stack(input, reinterpret_cast<char *>(output), 3, 4, 6);
   for (float i : output) {
     std::cout << i << " ";
   }