From f0eef679235b681aec66478f6bb1c6d554276859 Mon Sep 17 00:00:00 2001
From: yangruoqi713 <yangruoqi@huawei.com>
Date: Thu, 13 Aug 2020 20:17:13 +0800
Subject: [PATCH] [MS][LITE] fix bug of arm cpu fp32 op: conv depthwise;
 rewrite member variables of some ops

---
 .../lite/src/ops/deconvolution_depthwise.cc   |  4 +-
 .../src/runtime/kernel/arm/fp32/batchnorm.h   |  8 +--
 .../kernel/arm/fp32/convolution_depthwise.h   |  8 +--
 .../arm/fp32/convolution_depthwise_3x3.h      | 10 +--
 .../runtime/kernel/arm/fp32/fused_batchnorm.h | 12 ++--
 .../lite/src/runtime/kernel/arm/fp32/scale.cc | 18 +++---
 .../lite/src/runtime/kernel/arm/fp32/scale.h  |  9 ++-
 .../kernel/arm/fp32/batchnorm_fp32_tests.cc   | 63 +++++++++++++++++++
 8 files changed, 98 insertions(+), 34 deletions(-)

diff --git a/mindspore/lite/src/ops/deconvolution_depthwise.cc b/mindspore/lite/src/ops/deconvolution_depthwise.cc
index fe99cb4afd..4251ad6aad 100644
--- a/mindspore/lite/src/ops/deconvolution_depthwise.cc
+++ b/mindspore/lite/src/ops/deconvolution_depthwise.cc
@@ -48,8 +48,8 @@ int DeconvDepthwiseConv2D::InferShape(std::vector<tensor::Tensor *> inputs_, std
   pad_u_ = conv_prim->padUp();
   pad_d_ = conv_prim->padDown();
   pad_r_ = conv_prim->padRight();
-  output_h = conv_prim->strideH() * (input_h - 1) * conv_prim->kernelH() - pad_u_ - pad_d_;
-  output_w = conv_prim->strideW() * (input_w - 1) * conv_prim->kernelW() - pad_l_ - pad_r_;
+  output_h = conv_prim->strideH() * (input_h - 1) + conv_prim->kernelH() - pad_u_ - pad_d_;
+  output_w = conv_prim->strideW() * (input_w - 1) + conv_prim->kernelW() - pad_l_ - pad_r_;
   if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) {
     output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH();
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
index 28d9027cf8..3cc451ba32 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
@@ -42,10 +42,10 @@ class BatchnormCPUKernel : public LiteKernel {
   int DoExecute(int tid);
 
  private:
-  float *in_addr_;
-  float *mean_addr_;
-  float *var_addr_;
-  float *out_addr_;
+  float *in_addr_ = nullptr;
+  float *mean_addr_ = nullptr;
+  float *var_addr_ = nullptr;
+  float *out_addr_ = nullptr;
   BatchNormParameter *batchnorm_param_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
index 22de529bca..91e82c61bf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
@@ -40,10 +40,10 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
   int Execute(int task_id);
 
  private:
-  SlidingWindowParam *sliding_;
-  float *packed_weight_;
-  float *packed_input_;
-  float *packed_output_;
+  SlidingWindowParam *sliding_ = nullptr;
+  float *packed_weight_ = nullptr;
+  float *packed_input_ = nullptr;
+  float *packed_output_ = nullptr;
   bool need_align_ = false;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
index ee937456da..19737762bc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
@@ -49,11 +49,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
   int Execute(int task_id);
 
  private:
-  float *packed_weight_;
-  float *packed_input_;
-  float *packed_output_;
-  float *block_buffer_;
-  float *trans_buffer_;
+  float *packed_weight_ = nullptr;
+  float *packed_input_ = nullptr;
+  float *packed_output_ = nullptr;
+  float *block_buffer_ = nullptr;
+  float *trans_buffer_ = nullptr;
   int trans_size_;
   bool need_align_ = false;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
index a8b371874b..e1c67e545f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
@@ -40,12 +40,12 @@ class FusedBatchnormCPUKernel : public LiteKernel {
   int Execute(int task_id);
 
  private:
-  float *in_addr_;
-  float *mean_addr_;
-  float *var_addr_;
-  float *scale_addr_;
-  float *offset_addr_;
-  float *out_addr_;
+  float *in_addr_ = nullptr;
+  float *mean_addr_ = nullptr;
+  float *var_addr_ = nullptr;
+  float *scale_addr_ = nullptr;
+  float *offset_addr_ = nullptr;
+  float *out_addr_ = nullptr;
 
   BatchNormParameter *batchnorm_param_;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
index 2e32a31399..b3d553b3f9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
@@ -28,9 +28,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Scale;
 
 namespace mindspore::kernel {
-ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); }
-
-void ScaleCPUKernel::FreeTmpBuffer() {
+ScaleCPUKernel::~ScaleCPUKernel() {
   if (scale_param_->const_scale_) {
     if (scale_ != nullptr) {
       free(scale_);
@@ -46,7 +44,6 @@ void ScaleCPUKernel::FreeTmpBuffer() {
 }
 
 int ScaleCPUKernel::InitScaleOffset() {
-  FreeTmpBuffer();
   auto scale_tensor = in_tensors_.at(1);
   float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
   if (scale_ptr != nullptr) {
@@ -116,10 +113,7 @@ int ScaleCPUKernel::Init() {
   if (!InferShapeDone()) {
     return RET_OK;
   }
-  return ReSize();
-}
 
-int ScaleCPUKernel::ReSize() {
   auto ret = InitParameter();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
@@ -134,6 +128,15 @@ int ScaleCPUKernel::ReSize() {
   return RET_OK;
 }
 
+int ScaleCPUKernel::ReSize() {
+  auto ret = InitParameter();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
 int ScaleCPUKernel::Scale(int task_id) {
   auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
   if (ret != RET_OK) {
@@ -173,7 +176,6 @@ int ScaleCPUKernel::Run() {
     MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
     return RET_ERROR;
   }
-  FreeTmpBuffer();
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
index 4e40a448f7..2cfded9e08 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
@@ -41,11 +41,10 @@ class ScaleCPUKernel : public LiteKernel {
   int Scale(int task_id);
 
  private:
-  void FreeTmpBuffer();
-  float *input_ptr_;
-  float *scale_;
-  float *offset_;
-  float *output_ptr_;
+  float *input_ptr_ = nullptr;
+  float *scale_ = nullptr;
+  float *offset_ = nullptr;
+  float *output_ptr_ = nullptr;
   ScaleParameter *scale_param_;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
index dc36f5e933..007b5031b3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc
@@ -84,4 +84,67 @@ TEST_F(TestBatchnormFp32, BNTest) {
   output0_tensor.SetData(nullptr);
   MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
 }
+
+TEST_F(TestBatchnormFp32, FusedBNTest) {
+  std::vector<float> in_data = {-7.400094, 11.37495, 2.0271842,  5.5954003,  13.255154, 4.6289115,
+                                9.591311,  8.699771, -12.226144, -6.1819935, 6.957936,  -8.70818};
+  std::vector<float> scale = {13.323708, 14.0656395, 12.634319};
+  std::vector<float> offset = {27.888096, 24.533648, 15.335093};
+  std::vector<float> mean = {11.5127125, 0.47681615, 5.851508};
+  std::vector<float> var = {1.270583, 13.005714, 6.089223};
+  std::vector<lite::tensor::Tensor *> inputs_tensor;
+  std::vector<lite::tensor::Tensor *> outputs_tensor;
+
+  BatchNormParameter op_param;
+  op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm;
+  op_param.epsilon_ = 0.001f;
+
+  std::vector<int> shape = {1, 2, 2, 3};
+  lite::tensor::Tensor input[5];
+  input[0].SetData(in_data.data());
+  input[1].SetData(scale.data());
+  input[2].SetData(offset.data());
+  input[3].SetData(mean.data());
+  input[4].SetData(var.data());
+
+  input[0].set_shape(shape);
+  for (int i = 1; i < 5; i++) {
+    input[i].set_shape({3});
+  }
+  for (int i = 0; i < 5; i++) {
+    inputs_tensor.push_back(&input[i]);
+  }
+
+  std::vector<float> output(12);
+  std::vector<float> corr_out = {-195.5765, 67.03745, -4.243883,  -42.028015, 74.37044, 9.075897,
+                                 5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563};
+
+  lite::tensor::Tensor output0_tensor;
+  outputs_tensor.push_back(&output0_tensor);
+  output0_tensor.SetData(output.data());
+  output0_tensor.set_shape(shape);
+  kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm};
+  auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
+  ASSERT_NE(creator, nullptr);
+  lite::Context ctx;
+  ctx.thread_num_ = 1;
+  kernel::LiteKernel *kernel =
+    creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
+  ASSERT_NE(kernel, nullptr);
+  auto output_tensor_shape = output0_tensor.shape();
+  kernel->Run();
+
+  printf("==================output data=================\n");
+  for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
+    std::cout << output[i] << " ,";
+  }
+  std::cout << std::endl;
+  CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001);
+
+  for (int i = 1; i < 5; i++) {
+    input[i].SetData(nullptr);
+  }
+  output0_tensor.SetData(nullptr);
+  MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed";
+}
 }  // namespace mindspore