From e2c4e74ecdae4b172fa44ba5e6ce15562c54ff36 Mon Sep 17 00:00:00 2001
From: sunsuodong <sunsuodong@huawei.com>
Date: Wed, 9 Sep 2020 09:40:25 +0800
Subject: [PATCH] Refactor SpaceToBatchND and add fp16 kernel

---
 mindspore/lite/nnacl/fp32/space_to_batch.c    |  20 ++-
 mindspore/lite/nnacl/fp32/space_to_batch.h    |  12 +-
 .../lite/nnacl/int8/space_to_batch_int8.c     |  91 +++++++++++++
 .../lite/nnacl/int8/space_to_batch_int8.h     |  30 +++++
 .../runtime/kernel/arm/fp32/space_to_batch.cc | 126 ++++++------------
 .../runtime/kernel/arm/fp32/space_to_batch.h  |  10 +-
 .../kernel/arm/int8/space_to_batch_int8.cc    |  84 ++++++++++++
 .../kernel/arm/int8/space_to_batch_int8.h     |  36 +++++
 .../arm/fp32/space_to_batch_fp32_tests.cc     |  25 ++--
 .../arm/int8/space_to_batch_int8_tests.cc     |  57 ++++++++
 10 files changed, 368 insertions(+), 123 deletions(-)
 create mode 100644 mindspore/lite/nnacl/int8/space_to_batch_int8.c
 create mode 100644 mindspore/lite/nnacl/int8/space_to_batch_int8.h
 create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
 create mode 100644 mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
 create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc

diff --git a/mindspore/lite/nnacl/fp32/space_to_batch.c b/mindspore/lite/nnacl/fp32/space_to_batch.c
index 0cd665de02..589f29080f 100644
--- a/mindspore/lite/nnacl/fp32/space_to_batch.c
+++ b/mindspore/lite/nnacl/fp32/space_to_batch.c
@@ -15,17 +15,14 @@
  */
 #include "nnacl/fp32/space_to_batch.h"
 #include "nnacl/arithmetic_common.h"
-#include "nnacl/errorcode.h"
-#include "nnacl/op_base.h"
 
-void DoSpaceToBatchNHWC(const float *input, float *output, SpaceToBatchParameter *param, int *in_shape,
-                        int *out_shape) {
+void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape) {
   int out_dim0 = out_shape[0];
   int out_dim1 = out_shape[1];
   int out_dim2 = out_shape[2];
   int copy_num = out_shape[3];
-  int block_w = param->block_sizes_[1];
-  int block_h = param->block_sizes_[0];
+  int block_w = block_sizes[1];
+  int block_h = block_sizes[0];
   int in_strides[4];
   ComputeStrides(in_shape, in_strides, 4);
   int out_strides[4];
@@ -48,8 +45,7 @@ void DoSpaceToBatchNHWC(const float *input, float *output, SpaceToBatchParameter
   }
 }
 
-void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape,
-                              const float *pedding_h_data, const float *pedding_w_data) {
+void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape) {
   int in_h = in_shape[1];
   int in_w = in_shape[2];
   int in_c = in_shape[3];
@@ -67,13 +63,13 @@ void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape,
   for (int i = 0; i < in_shape[0]; ++i) {
     size_t in_offset0 = i * in_strides[0];
     for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) {
-        memcpy(output + out_offset, pedding_h_data, ped_h_size);
+        memset(output + out_offset, 0, ped_h_size);
         out_offset += ped_h_num;
     }
     for (int j = 0; j < in_h; ++j) {
       size_t in_offset1 = in_offset0 + j * in_strides[1];
       for (int pad_w_left = 0; pad_w_left < padding[2]; ++pad_w_left) {
-        memcpy(output + out_offset, pedding_w_data, ped_w_size);
+        memset(output + out_offset, 0, ped_w_size);
         out_offset += out_c;
       }
       for (int k = 0; k < in_w; ++k) {
@@ -82,12 +78,12 @@ void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape,
         out_offset += in_c;
       }
       for (int pad_w_right = 0; pad_w_right < padding[3]; ++pad_w_right) {
-        memcpy(output + out_offset, pedding_w_data, ped_w_size);
+        memset(output + out_offset, 0, ped_w_size);
         out_offset += out_c;
       }
     }
     for (int pad_h_bottom = 0; pad_h_bottom < padding[1]; ++pad_h_bottom) {
-      memcpy(output + out_offset, pedding_h_data, ped_h_size);
+      memset(output + out_offset, 0, ped_h_size);
       out_offset += ped_h_num;
     }
   }
diff --git a/mindspore/lite/nnacl/fp32/space_to_batch.h b/mindspore/lite/nnacl/fp32/space_to_batch.h
index 19d941c42b..31c91e5c1f 100644
--- a/mindspore/lite/nnacl/fp32/space_to_batch.h
+++ b/mindspore/lite/nnacl/fp32/space_to_batch.h
@@ -17,21 +17,21 @@
 #define MINDSPORE_LITE_SRC_BACKEND_ARM_NNACL_FP32_SPACE_TO_BATCH_H_
 #include "nnacl/op_base.h"
 
-#define SPACE_TO_BATCH_BLOCK_SIZES_SIZE 2
-#define SPACE_TO_BATCH_PADDINGS_SIZE 4
-
 typedef struct SpaceToBatchParameter {
   OpParameter op_parameter_;
   bool need_paddings_;
   int block_sizes_[4];
   int paddings_[4];
+  int input_shape_[4];
+  int output_shape_[4];
+  int padded_in_shape_[4];
+  int padded_input_element_num;
 } SpaceToBatchParameter;
 #ifdef __cplusplus
 extern "C" {
 #endif
-void DoSpaceToBatchNHWC(const float *input, float *output, SpaceToBatchParameter *param, int *in_shape, int *out_shape);
-void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape,
-                               const float *pedding_h_data, const float *pedding_w_data);
+void DoSpaceToBatchNHWC(const float *input, float *output, int *block_sizes, int *in_shape, int *out_shape);
+void DoSpaceToBatchPaddingNHWC(const float *input, float *output, int *in_shape, int *padding, int *out_shape);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/lite/nnacl/int8/space_to_batch_int8.c b/mindspore/lite/nnacl/int8/space_to_batch_int8.c
new file mode 100644
index 0000000000..f86049d730
--- /dev/null
+++ b/mindspore/lite/nnacl/int8/space_to_batch_int8.c
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "nnacl/int8/space_to_batch_int8.h"
+#include "nnacl/arithmetic_common.h"
+
+void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape,
+                        int *out_shape) {
+  int out_dim0 = out_shape[0];
+  int out_dim1 = out_shape[1];
+  int out_dim2 = out_shape[2];
+  int copy_num = out_shape[3];
+  int block_w = block_sizes[1];
+  int block_h = block_sizes[0];
+  int in_strides[4];
+  ComputeStrides(in_shape, in_strides, 4);
+  int out_strides[4];
+  ComputeStrides(out_shape, out_strides, 4);
+  size_t copy_size = copy_num * sizeof(int8_t);
+  size_t out_offset = 0;
+  for (int n = 0; n < out_dim0; ++n) {
+    int in_n = n % in_shape[0];
+    int32_t stride_w = (n / in_shape[0]) % block_w;
+    int32_t stride_h = (n / in_shape[0]) / block_w;
+    size_t in_offset0 = in_n * in_strides[0];
+    for (int h = 0; h < out_dim1; ++h) {
+      size_t in_offset1 = in_offset0 + (h * block_h + stride_h) * in_strides[1];
+      for (int w = 0; w < out_dim2; ++w) {
+        size_t in_offset2 = in_offset1 + (w * block_w + stride_w) * in_strides[2];
+        memcpy(output + out_offset, input + in_offset2, copy_size);
+        out_offset += copy_num;
+      }
+    }
+  }
+}
+
+void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape) {
+  int in_h = in_shape[1];
+  int in_w = in_shape[2];
+  int in_c = in_shape[3];
+  int out_w = out_shape[2];
+  int out_c = out_shape[3];
+  size_t ped_h_num = out_w * out_c;
+  size_t ped_h_size = ped_h_num * sizeof(int8_t);
+  size_t ped_w_size = out_c * sizeof(int8_t);
+  size_t out_offset = 0;
+  int in_strides[4];
+  ComputeStrides(in_shape, in_strides, 4);
+  int out_strides[4];
+  ComputeStrides(out_shape, out_strides, 4);
+  size_t copy_size = in_c * sizeof(int8_t);
+  for (int i = 0; i < in_shape[0]; ++i) {
+    size_t in_offset0 = i * in_strides[0];
+    for (int pad_h_top = 0; pad_h_top < padding[0]; ++pad_h_top) {
+        memset(output + out_offset, 0, ped_h_size);
+        out_offset += ped_h_num;
+    }
+    for (int j = 0; j < in_h; ++j) {
+      size_t in_offset1 = in_offset0 + j * in_strides[1];
+      for (int pad_w_left = 0; pad_w_left < padding[2]; ++pad_w_left) {
+        memset(output + out_offset, 0, ped_w_size);
+        out_offset += out_c;
+      }
+      for (int k = 0; k < in_w; ++k) {
+        size_t in_offset2 = in_offset1 + k * in_strides[2];
+        memcpy(output + out_offset, input + in_offset2, copy_size);
+        out_offset += in_c;
+      }
+      for (int pad_w_right = 0; pad_w_right < padding[3]; ++pad_w_right) {
+        memset(output + out_offset, 0, ped_w_size);
+        out_offset += out_c;
+      }
+    }
+    for (int pad_h_bottom = 0; pad_h_bottom < padding[1]; ++pad_h_bottom) {
+      memset(output + out_offset, 0, ped_h_size);
+      out_offset += ped_h_num;
+    }
+  }
+}
diff --git a/mindspore/lite/nnacl/int8/space_to_batch_int8.h b/mindspore/lite/nnacl/int8/space_to_batch_int8.h
new file mode 100644
index 0000000000..799091cdae
--- /dev/null
+++ b/mindspore/lite/nnacl/int8/space_to_batch_int8.h
@@ -0,0 +1,30 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_
+#define MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_
+
+#include "nnacl/op_base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void DoSpaceToBatchNHWCInt8(const int8_t *input, int8_t *output, int *block_sizes, int *in_shape, int *out_shape);
+void DoSpaceToBatchPaddingNHWCInt8(const int8_t *input, int8_t *output, int *in_shape, int *padding, int *out_shape);
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_NNACL_INT8_SPACE_TO_BATCH_INT8_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
index dcccada248..2c1d82a975 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.cc
@@ -15,100 +15,52 @@
  */
 #include "src/runtime/kernel/arm/fp32/space_to_batch.h"
 #include <vector>
-#include "schema/ops_generated.h"
-#include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "nnacl/fp32/space_to_batch.h"
-#include "nnacl/errorcode.h"
 #include "include/errorcode.h"
-#include "src/runtime/runtime_api.h"
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_FORMAT_ERR;
 using mindspore::lite::RET_OK;
-using mindspore::lite::RET_OP_EXECUTE_FAILURE;
 using mindspore::schema::PrimitiveType_SpaceToBatch;
 using mindspore::schema::PrimitiveType_SpaceToBatchND;
 
 namespace mindspore::kernel {
-namespace {
-size_t EnumElement(int *shape, int n_dims) {
-  size_t total = 1;
-  for (int i = 0; i < n_dims; i++) {
-    total *= shape[i];
-  }
-  return total;
-}
-}  // namespace
-
 int SpaceToBatchCPUKernel::Init() {
-  SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
-  for (int i = 0; i < SPACE_TO_BATCH_PADDINGS_SIZE; ++i) {
-    if (param->paddings_[i] != 0) {
-      param->need_paddings_ = true;
-      break;
-    }
-  }
-
   if (!InferShapeDone()) {
     return RET_OK;
   }
   return ReSize();
 }
 
-void SpaceToBatchCPUKernel::FreeTmpBuffer() {
-  if (pedding_h_data_ != nullptr) {
-    context_->allocator->Free(pedding_h_data_);
-    pedding_h_data_ = nullptr;
-  }
-  if (pedding_w_data_ != nullptr) {
-    context_->allocator->Free(pedding_w_data_);
-    pedding_w_data_ = nullptr;
-  }
-  if (pedding_input_ != nullptr) {
-    context_->allocator->Free(pedding_input_);
-    pedding_input_ = nullptr;
-  }
-}
-
 int SpaceToBatchCPUKernel::ReSize() {
-  if (in_tensors_[0]->GetFormat() != schema::Format::Format_NHWC) {
+  auto input_tensor = in_tensors_.at(0);
+  auto output_tensor = out_tensors_.at(0);
+  if (input_tensor->GetFormat() != schema::Format_NHWC) {
     MS_LOG(ERROR) << "space_to_batch only support NHWC now!";
     return RET_FORMAT_ERR;
   }
-  FreeTmpBuffer();
   SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
-  if (!param->need_paddings_) {
-    return RET_OK;
-  }
-  auto input = in_tensors_[0];
-  auto in_shape = input->shape();
-  padded_in_shape_ = in_shape;
-  padded_in_shape_[1] = in_shape[1] + param->paddings_[0] + param->paddings_[1];
-  padded_in_shape_[2] = in_shape[2] + param->paddings_[2] + param->paddings_[3];
-  auto num_elements_padded = EnumElement(padded_in_shape_.data(), in_shape.size());
-  auto output_shape = out_tensors_[0]->shape();
-  auto pedding_h_size = padded_in_shape_[2] * output_shape[3] * sizeof(float);
-  pedding_h_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(pedding_h_size));
-  if (pedding_h_data_ == nullptr) {
-    MS_LOG(ERROR) << "malloc pedding h data fail!";
-    return RET_ERROR;
+
+  for (size_t i = 0; i < DIMENSION_4D; i++) {
+    param->input_shape_[i] = input_tensor->shape().at(i);
+    param->output_shape_[i] = output_tensor->shape().at(i);
   }
-  auto pedding_w_size = output_shape[3] * sizeof(float);
-  pedding_w_data_ = reinterpret_cast<float *>(context_->allocator->Malloc(pedding_w_size));
-  if (pedding_w_data_ == nullptr) {
-    MS_LOG(ERROR) << "malloc pedding w data fail!";
-    FreeTmpBuffer();
-    return RET_ERROR;
+  for (int i = 0; i < DIMENSION_4D; ++i) {
+    if (param->paddings_[i] != 0) {
+      param->need_paddings_ = true;
+      break;
+    }
   }
-  pedding_input_ = reinterpret_cast<float *>(context_->allocator->Malloc(num_elements_padded * sizeof(float)));
-  if (pedding_input_ == nullptr) {
-    MS_LOG(ERROR) << "malloc pedding buffer fail!";
-    return RET_ERROR;
+  if (param->need_paddings_) {
+    param->padded_in_shape_[kNHWC_N] = input_tensor->shape().at(kNHWC_N);
+    param->padded_in_shape_[kNHWC_H] = input_tensor->shape().at(kNHWC_H) + param->paddings_[0] + param->paddings_[1];
+    param->padded_in_shape_[kNHWC_W] = input_tensor->shape().at(kNHWC_W) + param->paddings_[2] + param->paddings_[3];
+    param->padded_in_shape_[kNHWC_C] = input_tensor->shape().at(kNHWC_C);
+    param->padded_input_element_num = param->padded_in_shape_[kNHWC_N] * param->padded_in_shape_[kNHWC_H] *
+                                      param->padded_in_shape_[kNHWC_W] * param->padded_in_shape_[kNHWC_C];
   }
-  memset(pedding_h_data_, 0, pedding_h_size);
-  memset(pedding_w_data_, 0, pedding_w_size);
   return RET_OK;
 }
 
@@ -118,23 +70,34 @@ int SpaceToBatchCPUKernel::Run() {
     MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
     return ret;
   }
-  auto input = in_tensors_[0];
-  auto output = out_tensors_[0];
-  const float *input_ptr_ = reinterpret_cast<const float *>(input->MutableData());
-  float *output_ptr_ = reinterpret_cast<float *>(output->MutableData());
+  auto input_tensor = in_tensors_.at(0);
+  auto output_tensor = out_tensors_.at(0);
+  auto input_ptr = reinterpret_cast<const float *>(input_tensor->MutableData());
+  auto output_ptr = reinterpret_cast<float *>(output_tensor->MutableData());
   SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
-  auto in_shape = input->shape();
-  auto out_shape = output->shape();
+
   if (param->need_paddings_) {
-    DoSpaceToBatchPaddingNHWC(input_ptr_, pedding_input_, in_shape.data(), param->paddings_, padded_in_shape_.data(),
-                              pedding_h_data_, pedding_w_data_);
-    DoSpaceToBatchNHWC(pedding_input_, output_ptr_, param, padded_in_shape_.data(), out_shape.data());
-    return RET_OK;
+    padded_input_ = context_->allocator->Malloc(param->padded_input_element_num * sizeof(float));
+    if (padded_input_ == nullptr) {
+      MS_LOG(ERROR) << "Memory allocation failed";
+      return RET_ERROR;
+    }
+    auto padded_input = reinterpret_cast<float *>(padded_input_);
+    DoSpaceToBatchPaddingNHWC(input_ptr, padded_input, param->input_shape_, param->paddings_, param->padded_in_shape_);
+    DoSpaceToBatchNHWC(padded_input, output_ptr, param->block_sizes_, param->padded_in_shape_, param->output_shape_);
+    FreeTmpBuffer();
   } else {
-    DoSpaceToBatchNHWC(input_ptr_, output_ptr_, param, in_shape.data(), out_shape.data());
-    return RET_OK;
+    DoSpaceToBatchNHWC(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_);
   }
-}  // namespace mindspore::kernel
+  return RET_OK;
+}
+
+void SpaceToBatchCPUKernel::FreeTmpBuffer() {
+  if (padded_input_ != nullptr) {
+    context_->allocator->Free(padded_input_);
+    padded_input_ = nullptr;
+  }
+}
 
 kernel::LiteKernel *CpuSpaceToBatchFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                      const std::vector<lite::Tensor *> &outputs, OpParameter *param,
@@ -149,12 +112,11 @@ kernel::LiteKernel *CpuSpaceToBatchFp32KernelCreator(const std::vector<lite::Ten
     MS_LOG(ERROR) << "new SpaceToBatchCPUKernel fail!";
     return nullptr;
   }
-
   auto ret = kernel->Init();
   if (ret != RET_OK) {
-    delete kernel;
     MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
                   << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
+    delete kernel;
     return nullptr;
   }
   return kernel;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
index f84149c3ee..5debd97e05 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch.h
@@ -27,18 +27,16 @@ class SpaceToBatchCPUKernel : public LiteKernel {
                         const mindspore::lite::PrimitiveC *primitive)
       : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
 
-  ~SpaceToBatchCPUKernel() { FreeTmpBuffer(); }
+  ~SpaceToBatchCPUKernel() {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
- private:
+ protected:
+  size_t EnumElement(int *shape, int n_dims);
   void FreeTmpBuffer();
-  float *pedding_input_ = nullptr;
-  float *pedding_h_data_ = nullptr;
-  float *pedding_w_data_ = nullptr;
-  std::vector<int> padded_in_shape_;
+  void *padded_input_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
new file mode 100644
index 0000000000..44491fcb01
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.cc
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/int8/space_to_batch_int8.h"
+#include "src/kernel_registry.h"
+#include "nnacl/fp32/space_to_batch.h"
+#include "nnacl/int8/space_to_batch_int8.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_SpaceToBatch;
+using mindspore::schema::PrimitiveType_SpaceToBatchND;
+
+namespace mindspore::kernel {
+int SpaceToBatchInt8CPUKernel::Run() {
+  auto ret = Prepare();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
+    return ret;
+  }
+  auto input_tensor = in_tensors_.at(0);
+  auto output_tensor = out_tensors_.at(0);
+  auto input_ptr = reinterpret_cast<const int8_t *>(input_tensor->MutableData());
+  auto output_ptr = reinterpret_cast<int8_t *>(output_tensor->MutableData());
+  SpaceToBatchParameter *param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
+
+  if (param->need_paddings_) {
+    padded_input_ = context_->allocator->Malloc(param->padded_input_element_num * sizeof(int8_t));
+    if (padded_input_ == nullptr) {
+      MS_LOG(ERROR) << "Memory allocation failed";
+      return RET_ERROR;
+    }
+    auto padded_input = reinterpret_cast<int8_t *>(padded_input_);
+    DoSpaceToBatchPaddingNHWCInt8(input_ptr, padded_input, param->input_shape_, param->paddings_,
+                                  param->padded_in_shape_);
+    DoSpaceToBatchNHWCInt8(padded_input, output_ptr, param->block_sizes_, param->padded_in_shape_,
+                           param->output_shape_);
+    FreeTmpBuffer();
+  } else {
+    DoSpaceToBatchNHWCInt8(input_ptr, output_ptr, param->block_sizes_, param->input_shape_, param->output_shape_);
+  }
+  return RET_OK;
+}
+
+kernel::LiteKernel *CpuSpaceToBatchInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                                     const std::vector<lite::Tensor *> &outputs,
+                                                     OpParameter *param, const lite::Context *ctx,
+                                                     const kernel::KernelKey &desc,
+                                                     const mindspore::lite::PrimitiveC *primitive) {
+  if (param == nullptr) {
+    MS_LOG(ERROR) << "Input param is nullptr!";
+    return nullptr;
+  }
+  auto *kernel = new (std::nothrow) SpaceToBatchInt8CPUKernel(param, inputs, outputs, ctx, primitive);
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "new SpaceToBatchInt8CPUKernel fail!";
+    return nullptr;
+  }
+  auto ret = kernel->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init kernel failed, name: " << param->name_
+                  << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(param->type_));
+    delete kernel;
+    return nullptr;
+  }
+  return kernel;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_SpaceToBatch, CpuSpaceToBatchInt8KernelCreator)
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_SpaceToBatchND, CpuSpaceToBatchInt8KernelCreator)
+}  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
new file mode 100644
index 0000000000..aaf0c9cbe6
--- /dev/null
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/space_to_batch_int8.h
@@ -0,0 +1,36 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPACE_TO_BATCH_INT8_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPACE_TO_BATCH_INT8_H_
+
+#include <vector>
+#include "src/runtime/kernel/arm/fp32/space_to_batch.h"
+
+namespace mindspore::kernel {
+class SpaceToBatchInt8CPUKernel : public SpaceToBatchCPUKernel {
+ public:
+  SpaceToBatchInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                            const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
+                            const mindspore::lite::PrimitiveC *primitive)
+      : SpaceToBatchCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+
+  ~SpaceToBatchInt8CPUKernel() {}
+
+  int Run() override;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_SPACE_TO_BATCH_INT8_H_
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_batch_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_batch_fp32_tests.cc
index dbacfd1d72..9f83c503b8 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_batch_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/space_to_batch_fp32_tests.cc
@@ -38,7 +38,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest4) {
   SpaceToBatchParameter param;
   param.block_sizes_[0] = 2;
   param.block_sizes_[1] = 1;
-  DoSpaceToBatchNHWC(input.data(), out, &param, in_shape.data(), out_shape.data());
+  DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -56,7 +56,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest5) {
   SpaceToBatchParameter param;
   param.block_sizes_[0] = 1;
   param.block_sizes_[1] = 2;
-  DoSpaceToBatchNHWC(input.data(), out, &param, in_shape.data(), out_shape.data());
+  DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -74,7 +74,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest6) {
   SpaceToBatchParameter param;
   param.block_sizes_[0] = 2;
   param.block_sizes_[1] = 2;
-  DoSpaceToBatchNHWC(input.data(), out, &param, in_shape.data(), out_shape.data());
+  DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -96,7 +96,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest7) {
   SpaceToBatchParameter param;
   param.block_sizes_[0] = 2;
   param.block_sizes_[1] = 2;
-  DoSpaceToBatchNHWC(input.data(), out, &param, in_shape.data(), out_shape.data());
+  DoSpaceToBatchNHWC(input.data(), out, param.block_sizes_, in_shape.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -115,10 +115,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest8) {
   std::vector<int> in_shape = {1, 4, 4, 2};
   std::vector<int> out_shape = {1, 5, 5, 2};
   std::vector<int> padding = {0, 1, 0, 1};
-  std::vector<float> pedding_h(10, 0);
-  std::vector<float> pedding_w(2, 0);
-  DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data(), pedding_h.data(),
-                            pedding_w.data());
+  DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -138,10 +135,7 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest9) {
   std::vector<int> in_shape = {1, 4, 4, 2};
   std::vector<int> out_shape = {1, 6, 6, 2};
   std::vector<int> padding = {1, 1, 1, 1};
-  std::vector<float> pedding_h(12, 0);
-  std::vector<float> pedding_w(2, 0);
-  DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data(), pedding_h.data(),
-                            pedding_w.data());
+  DoSpaceToBatchPaddingNHWC(input.data(), out, in_shape.data(), padding.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
@@ -163,14 +157,11 @@ TEST_F(SpaceToBatchTestFp32, SpaceToBatchTest10) {
   std::vector<int> pedding_out_shape = {1, 6, 6, 2};
   std::vector<int> out_shape = {4, 3, 3, 2};
   std::vector<int> padding = {1, 1, 1, 1};
-  std::vector<float> pedding_h(12, 0);
-  std::vector<float> pedding_w(2, 0);
-  DoSpaceToBatchPaddingNHWC(input.data(), pedding_out, in_shape.data(), padding.data(), pedding_out_shape.data(),
-                            pedding_h.data(), pedding_w.data());
+  DoSpaceToBatchPaddingNHWC(input.data(), pedding_out, in_shape.data(), padding.data(), pedding_out_shape.data());
   SpaceToBatchParameter param;
   param.block_sizes_[0] = 2;
   param.block_sizes_[1] = 2;
-  DoSpaceToBatchNHWC(pedding_out, out, &param, pedding_out_shape.data(), out_shape.data());
+  DoSpaceToBatchNHWC(pedding_out, out, param.block_sizes_, pedding_out_shape.data(), out_shape.data());
   for (int i = 0; i < kOutSize; ++i) {
     std::cout << out[i] << " ";
   }
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
new file mode 100644
index 0000000000..90bfa43b86
--- /dev/null
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/space_to_batch_int8_tests.cc
@@ -0,0 +1,57 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include "common/common_test.h"
+#include "nnacl/fp32/space_to_batch.h"
+#include "mindspore/lite/src/kernel_registry.h"
+
+namespace mindspore {
+class SpaceToBatchTestInt8 : public mindspore::CommonTest {
+ public:
+  SpaceToBatchTestInt8() {}
+};
+
+TEST_F(SpaceToBatchTestInt8, test1) {
+  lite::Tensor in_tensor(kNumberTypeInt8, {1, 2, 2, 1});
+  lite::Tensor out_tensor(kNumberTypeInt8, {4, 2, 2, 1});
+  int8_t input_data[] = {1, 2, 3, 4};
+  int8_t output_data[16] = {0};
+  in_tensor.SetData(input_data);
+  out_tensor.SetData(output_data);
+  std::vector<lite::Tensor *> inputs = {&in_tensor};
+  std::vector<lite::Tensor *> outputs = {&out_tensor};
+
+  SpaceToBatchParameter parameter = {{}, false, {2, 2}, {1, 1, 1, 1}};
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_SpaceToBatchND};
+
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+
+  auto ctx = std::make_shared<lite::Context>();
+  auto kernel = creator(inputs, outputs, reinterpret_cast<OpParameter *>(&parameter), ctx.get(), desc, nullptr);
+  ASSERT_NE(kernel, nullptr);
+
+  auto ret = kernel->Run();
+  EXPECT_EQ(0, ret);
+
+  int8_t expect[] = {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0};
+  for (int i = 0; i < 8; ++i) {
+    EXPECT_EQ(output_data[i], expect[i]);
+  }
+  in_tensor.SetData(nullptr);
+  out_tensor.SetData(nullptr);
+}
+}  // namespace mindspore