!4670 [MS][LITE][Develop]add op_batchnorm_int8 and testcase

Merge pull request !4670 from songhonglei413/roi
5 years ago · dc685392b4
parent 0b3ab6b70f 3bb1cea3be
commit dc685392b4
13 changed files with 490 additions and 8 deletions
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.cc
@ -19,6 +19,8 @@
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"
+#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
@ -21,6 +21,7 @@
 #include "src/lite_kernel.h"
 #include "include/context.h"
 #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"

 using mindspore::lite::Context;

--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc
@ -19,6 +19,8 @@
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
 #include "src/runtime/runtime_api.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"
+#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"

 using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
@ -19,7 +19,7 @@

 #include <vector>
 #include "src/lite_kernel.h"
-#include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"

 namespace mindspore::kernel {
 class FusedBatchnormCPUKernel : public LiteKernel {
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
@ -0,0 +1,168 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/runtime/kernel/arm/int8/batchnorm_int8.h"
+#include <math.h>
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+#include "src/runtime/runtime_api.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_BatchNorm;
+
+namespace mindspore::kernel {
+BatchnormInt8CPUKernel::~BatchnormInt8CPUKernel() {
+  if (alpha_addr_ != nullptr) {
+    free(alpha_addr_);
+    alpha_addr_ = nullptr;
+  }
+  if (beta_addr_ != nullptr) {
+    free(beta_addr_);
+    beta_addr_ = nullptr;
+  }
+}
+
+int BatchnormInt8CPUKernel::InitConstTensor() {
+  auto input = in_tensors_[0];
+  auto mean = in_tensors_[1];
+  auto variance = in_tensors_[2];
+  auto output = out_tensors_[0];
+
+  auto mean_ptr = reinterpret_cast<int8_t *>(mean->Data());
+  auto var_ptr = reinterpret_cast<int8_t *>(variance->Data());
+  alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
+  if (alpha_addr_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
+  if (beta_addr_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  // compute alpha, beta;
+  // 0. tmp = (S4 * Sqrt(e + S3 * (q3 - Z3)));
+  // 1. A = S1 / tmp;
+  // 2. B = Z4 - (A1 * Z1) -((S2 * (q2 - Z2)) / tmp;
+  auto eps = batchnorm_param_->epsilon_;
+  auto zp_in = input->GetQuantParams().front().zeroPoint;
+  auto zp_mean = mean->GetQuantParams().front().zeroPoint;
+  auto zp_var = variance->GetQuantParams().front().zeroPoint;
+  auto zp_out = output->GetQuantParams().front().zeroPoint;
+  auto s_in = input->GetQuantParams().front().scale;
+  auto s_mean = mean->GetQuantParams().front().scale;
+  auto s_var = variance->GetQuantParams().front().scale;
+  auto s_out = output->GetQuantParams().front().scale;
+
+  for (int i = 0; i < batchnorm_param_->channel_; ++i) {
+    float tmp = s_out * sqrt(eps + s_var * (var_ptr[i] - zp_var));
+    float tmp_a = s_in / tmp;
+    float tmp_b = zp_out - tmp_a * zp_in - (s_mean * (mean_ptr[i] - zp_mean)) / tmp;
+    alpha_addr_[i] = tmp_a;
+    beta_addr_[i] = tmp_b;
+  }
+  return RET_OK;
+}
+
+int BatchnormInt8CPUKernel::Init() {
+  auto input_shapes = in_tensors_[0]->shape();
+  auto n_dim = input_shapes.size();
+  batchnorm_param_->channel_ = input_shapes[n_dim - 1];
+  batchnorm_param_->unit_ = 1;
+  for (int i = 0; i < n_dim - 1; i++) {
+    batchnorm_param_->unit_ *= input_shapes[i];
+  }
+  batchnorm_param_->op_parameter_.thread_num_ =
+    MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_);
+
+  auto ret = InitConstTensor();
+  if (ret != 0) {
+    MS_LOG(ERROR) << "Batchnorm fp32 InitConstTensor failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int BatchnormInt8CPUKernel::ReSize() {
+  auto input_shapes = in_tensors_[0]->shape();
+  batchnorm_param_->unit_ = 1;
+  for (int i = 0; i < input_shapes.size() - 1; i++) {
+    batchnorm_param_->unit_ *= input_shapes[i];
+  }
+  return RET_OK;
+}
+
+int BatchnormInt8CPUKernel::DoExecute(int task_id) {
+  BatchNormInt8(out_addr_, in_addr_, alpha_addr_, beta_addr_, task_id, batchnorm_param_);
+  return RET_OK;
+}
+
+int BatchNormInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
+  auto g_kernel = reinterpret_cast<BatchnormInt8CPUKernel *>(cdata);
+  auto ret = g_kernel->DoExecute(task_id);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "BatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+int BatchnormInt8CPUKernel::Run() {
+  auto prepare_ret = Prepare();
+  if (prepare_ret != RET_OK) {
+    MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret;
+    return prepare_ret;
+  }
+  in_addr_ = reinterpret_cast<int8_t *>(in_tensors_.at(0)->Data());
+  out_addr_ = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data());
+
+  int ret = LiteBackendParallelLaunch(BatchNormInt8Run, this, batchnorm_param_->op_parameter_.thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
+    return ret;
+  }
+  return RET_OK;
+}
+
+kernel::LiteKernel *CpuBatchnormInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
+                                                  const std::vector<lite::tensor::Tensor *> &outputs,
+                                                  OpParameter *opParameter, const lite::Context *ctx,
+                                                  const kernel::KernelKey &desc,
+                                                  const mindspore::lite::PrimitiveC *primitive) {
+  MS_ASSERT(opParameter != nullptr);
+  MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm);
+  auto *kernel = new (std::nothrow) BatchnormInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "new BatchnormInt8CPUKernel fail!";
+    return nullptr;
+  }
+  auto ret = kernel->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
+                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    delete kernel;
+    return nullptr;
+  }
+  return kernel;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_BatchNorm, CpuBatchnormInt8KernelCreator)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.h
@ -0,0 +1,54 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_BATCHNORM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_BATCHNORM_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "include/context.h"
+#include "src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.h"
+#include "src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"
+
+using mindspore::lite::Context;
+
+namespace mindspore::kernel {
+class BatchnormInt8CPUKernel : public LiteKernel {
+ public:
+  BatchnormInt8CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
+                         const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
+                         const mindspore::lite::PrimitiveC *primitive)
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {
+    batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter);
+  }
+  ~BatchnormInt8CPUKernel() override;
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+  int InitConstTensor();
+  int DoExecute(int tid);
+
+ private:
+  int8_t *in_addr_ = nullptr;
+  int8_t *out_addr_ = nullptr;
+  float *alpha_addr_ = nullptr;
+  float *beta_addr_ = nullptr;
+  BatchNormParameter *batchnorm_param_;
+};
+}  // namespace mindspore::kernel
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_BATCHNORM_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/batchnorm_parameter.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/batchnorm_parameter.h
@ -0,0 +1,29 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_BATCHNORM_PARAMETER_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_BATCHNORM_PARAMETER_H_
+
+#include "nnacl/op_base.h"
+
+typedef struct BatchNormParameter {
+  OpParameter op_parameter_;
+  float epsilon_;
+  int unit_;
+  int channel_;
+} BatchNormParameter;
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_BATCHNORM_PARAMETER_H_
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.c
@ -16,6 +16,7 @@

 #include "nnacl/fp32/batchnorm.h"
 #include <math.h>
+#include "nnacl/batchnorm_parameter.h"

 void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id,
               BatchNormParameter *param) {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h
@ -18,13 +18,7 @@
 #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP32_BATCHNORM_H_

 #include "nnacl/op_base.h"
-
-typedef struct BatchNormParameter {
-  OpParameter op_parameter_;
-  float epsilon_;
-  int unit_;
-  int channel_;
-} BatchNormParameter;
+#include "nnacl/batchnorm_parameter.h"

 #ifdef __cplusplus
 extern "C" {
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.c
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.c
@ -0,0 +1,31 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "nnacl/int8/batchnorm_int8.h"
+#include <math.h>
+#include "nnacl/batchnorm_parameter.h"
+
+void BatchNormInt8(int8_t *output_ptr, const int8_t *input_ptr, const float *alpha_ptr, const float *beta_ptr,
+                   int task_id, BatchNormParameter *param) {
+  for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) {
+    for (int u = 0; u < param->unit_; u++) {
+      int32_t output_tmp = round(input_ptr[u * param->channel_ + c] * alpha_ptr[c] + beta_ptr[c]);
+      output_tmp = output_tmp > 127 ? 127 : output_tmp;
+      output_tmp = output_tmp < -128 ? -128 : output_tmp;
+      output_ptr[u * param->channel_ + c] = (int8_t)output_tmp;
+    }
+  }
+}
--- a/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.h
@ -0,0 +1,34 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_BATCHNORM_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_BATCHNORM_H_
+
+#include "nnacl/op_base.h"
+#include "nnacl/batchnorm_parameter.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void BatchNormInt8(int8_t *output_ptr, const int8_t *input_ptr, const float *alpha_ptr, const float *beta_ptr,
+                   int task_id, BatchNormParameter *param);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_INT8_BATCHNORM_H_
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
@ -147,4 +147,63 @@ TEST_F(TestBatchnormFp32, FusedBNTest) {
  output0_tensor.SetData(nullptr);
  MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed";
 }
+
+TEST_F(TestBatchnormFp32, easyTest) {
+  std::vector<float> in_data = {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6};
+  std::vector<float> in_data1 = {0.1, 0.6};
+  std::vector<float> in_data2 = {3, 4};
+  std::vector<lite::tensor::Tensor *> inputs_tensor;
+  std::vector<lite::tensor::Tensor *> outputs_tensor;
+
+  BatchNormParameter op_param;
+  op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm;
+  op_param.epsilon_ = 0.001f;
+
+  std::vector<int> shape = {1, 1, 6, 2};
+  lite::tensor::Tensor input0_tensor;
+  lite::tensor::Tensor input1_tensor;
+  lite::tensor::Tensor input2_tensor;
+  inputs_tensor.push_back(&input0_tensor);
+  inputs_tensor.push_back(&input1_tensor);
+  inputs_tensor.push_back(&input2_tensor);
+  input0_tensor.SetData(in_data.data());
+  input1_tensor.SetData(in_data1.data());
+  input2_tensor.SetData(in_data2.data());
+  input0_tensor.set_shape(shape);
+  input1_tensor.set_shape({2});
+  input2_tensor.set_shape({2});
+
+  std::vector<float> output(12);
+  std::vector<float> corr_out = {0.519529, 1.69979,  1.09678,  2.19973,  1.67404,  2.69966,
+                                 -0.63498, -2.29971, -1.21223, -2.79965, -1.78949, -3.29959};
+
+  lite::tensor::Tensor output0_tensor;
+  outputs_tensor.push_back(&output0_tensor);
+  output0_tensor.SetData(output.data());
+  output0_tensor.set_shape(shape);
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm};
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+  lite::Context ctx;
+  ctx.thread_num_ = 1;
+  kernel::LiteKernel *kernel =
+    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
+  ASSERT_NE(kernel, nullptr);
+  auto output_tensor_shape = output0_tensor.shape();
+  kernel->Run();
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
+    std::cout << output[i] << " ,";
+  }
+  std::cout << std::endl;
+  CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001);
+
+  input0_tensor.SetData(nullptr);
+  input1_tensor.SetData(nullptr);
+  input2_tensor.SetData(nullptr);
+  output0_tensor.SetData(nullptr);
+  MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
+}
+
 }  // namespace mindspore
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/int8/batchnorm_int8_test.cc
@ -0,0 +1,107 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <iostream>
+#include "mindspore/core/utils/log_adapter.h"
+#include "common/common_test.h"
+#include "mindspore/lite/src/runtime/kernel/arm/nnacl/batchnorm_parameter.h"
+#include "mindspore/lite/src/runtime/kernel/arm/nnacl/int8/batchnorm_int8.h"
+#include "mindspore/lite/src/kernel_registry.h"
+#include "mindspore/lite/src/lite_kernel.h"
+
+namespace mindspore {
+class TestBatchnormInt8 : public mindspore::CommonTest {
+ public:
+  TestBatchnormInt8() {}
+};
+
+TEST_F(TestBatchnormInt8, BNTest) {
+  std::vector<int8_t> in_data = {11, 41, 21, 51, 31, 61, -11, -41, -21, -51, -31, -61};
+  std::vector<int8_t> in_data1 = {4, 14};
+  std::vector<int8_t> in_data2 = {29, 39};
+  std::vector<lite::tensor::Tensor *> inputs_tensor;
+  std::vector<lite::tensor::Tensor *> outputs_tensor;
+
+  BatchNormParameter op_param;
+  op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm;
+  op_param.epsilon_ = 0.001f;
+
+  std::vector<int> shape = {1, 1, 6, 2};
+
+  lite::tensor::QuantArg input_quant_arg;
+  input_quant_arg.scale = 0.1;
+  input_quant_arg.zeroPoint = 1;
+  lite::tensor::QuantArg input_quant_arg_1;
+  input_quant_arg_1.scale = 0.05;
+  input_quant_arg_1.zeroPoint = 2;
+  lite::tensor::QuantArg input_quant_arg_2;
+  input_quant_arg_2.scale = 0.1;
+  input_quant_arg_2.zeroPoint = -1;
+  lite::tensor::QuantArg output_quant_arg;
+  output_quant_arg.scale = 1;
+  output_quant_arg.zeroPoint = 0;
+
+  lite::tensor::Tensor input0_tensor;
+  lite::tensor::Tensor input1_tensor;
+  lite::tensor::Tensor input2_tensor;
+  inputs_tensor.push_back(&input0_tensor);
+  inputs_tensor.push_back(&input1_tensor);
+  inputs_tensor.push_back(&input2_tensor);
+  input0_tensor.SetData(in_data.data());
+  input1_tensor.SetData(in_data1.data());
+  input2_tensor.SetData(in_data2.data());
+  input0_tensor.set_shape(shape);
+  input1_tensor.set_shape({2});
+  input2_tensor.set_shape({2});
+  input0_tensor.AddQuantParam(input_quant_arg);
+  input1_tensor.AddQuantParam(input_quant_arg_1);
+  input2_tensor.AddQuantParam(input_quant_arg_2);
+
+  std::vector<int8_t> output(12);
+  // std::vector<int8_t> corr_out1 = {5, 17, 11, 22, 17, 27, -6, -23, -12, -28, -18, -33};
+  std::vector<int8_t> corr_out = {1, 2, 1, 2, 2, 3, -1, -2, -1, -3, -2, -3};
+
+  lite::tensor::Tensor output0_tensor;
+  outputs_tensor.push_back(&output0_tensor);
+  output0_tensor.SetData(output.data());
+  output0_tensor.set_shape(shape);
+  output0_tensor.AddQuantParam(output_quant_arg);
+
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt8, schema::PrimitiveType_BatchNorm};
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+  lite::Context ctx;
+  ctx.thread_num_ = 3;
+  kernel::LiteKernel *kernel =
+    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
+  ASSERT_NE(kernel, nullptr);
+  auto output_tensor_shape = output0_tensor.shape();
+  kernel->Run();
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
+    printf("%d, ", output[i]);
+  }
+  std::cout << std::endl;
+  CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001);
+
+  input0_tensor.SetData(nullptr);
+  input1_tensor.SetData(nullptr);
+  input2_tensor.SetData(nullptr);
+  output0_tensor.SetData(nullptr);
+  MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
+}
+
+}  // namespace mindspore