From f0eef679235b681aec66478f6bb1c6d554276859 Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Thu, 13 Aug 2020 20:17:13 +0800 Subject: [PATCH] [MS][LITE] fix bug of arm cpu fp32 op: conv depthwise; rewrite member variables of some ops --- .../lite/src/ops/deconvolution_depthwise.cc | 4 +- .../src/runtime/kernel/arm/fp32/batchnorm.h | 8 +-- .../kernel/arm/fp32/convolution_depthwise.h | 8 +-- .../arm/fp32/convolution_depthwise_3x3.h | 10 +-- .../runtime/kernel/arm/fp32/fused_batchnorm.h | 12 ++-- .../lite/src/runtime/kernel/arm/fp32/scale.cc | 18 +++--- .../lite/src/runtime/kernel/arm/fp32/scale.h | 9 ++- .../kernel/arm/fp32/batchnorm_fp32_tests.cc | 63 +++++++++++++++++++ 8 files changed, 98 insertions(+), 34 deletions(-) diff --git a/mindspore/lite/src/ops/deconvolution_depthwise.cc b/mindspore/lite/src/ops/deconvolution_depthwise.cc index fe99cb4afd..4251ad6aad 100644 --- a/mindspore/lite/src/ops/deconvolution_depthwise.cc +++ b/mindspore/lite/src/ops/deconvolution_depthwise.cc @@ -48,8 +48,8 @@ int DeconvDepthwiseConv2D::InferShape(std::vector inputs_, std pad_u_ = conv_prim->padUp(); pad_d_ = conv_prim->padDown(); pad_r_ = conv_prim->padRight(); - output_h = conv_prim->strideH() * (input_h - 1) * conv_prim->kernelH() - pad_u_ - pad_d_; - output_w = conv_prim->strideW() * (input_w - 1) * conv_prim->kernelW() - pad_l_ - pad_r_; + output_h = conv_prim->strideH() * (input_h - 1) + conv_prim->kernelH() - pad_u_ - pad_d_; + output_w = conv_prim->strideW() * (input_w - 1) + conv_prim->kernelW() - pad_l_ - pad_r_; if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) { output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h index 28d9027cf8..3cc451ba32 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h @@ -42,10 +42,10 @@ class BatchnormCPUKernel : public LiteKernel { int DoExecute(int tid); private: - float *in_addr_; - float *mean_addr_; - float *var_addr_; - float *out_addr_; + float *in_addr_ = nullptr; + float *mean_addr_ = nullptr; + float *var_addr_ = nullptr; + float *out_addr_ = nullptr; BatchNormParameter *batchnorm_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h index 22de529bca..91e82c61bf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h @@ -40,10 +40,10 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - SlidingWindowParam *sliding_; - float *packed_weight_; - float *packed_input_; - float *packed_output_; + SlidingWindowParam *sliding_ = nullptr; + float *packed_weight_ = nullptr; + float *packed_input_ = nullptr; + float *packed_output_ = nullptr; bool need_align_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h index ee937456da..19737762bc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h @@ -49,11 +49,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { int Execute(int task_id); private: - float *packed_weight_; - float *packed_input_; - float *packed_output_; - float *block_buffer_; - float *trans_buffer_; + float *packed_weight_ = nullptr; + float *packed_input_ = nullptr; + float *packed_output_ = nullptr; + float *block_buffer_ = nullptr; + float *trans_buffer_ = nullptr; int trans_size_; bool need_align_ = false; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h index a8b371874b..e1c67e545f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h @@ -40,12 +40,12 @@ class FusedBatchnormCPUKernel : public LiteKernel { int Execute(int task_id); private: - float *in_addr_; - float *mean_addr_; - float *var_addr_; - float *scale_addr_; - float *offset_addr_; - float *out_addr_; + float *in_addr_ = nullptr; + float *mean_addr_ = nullptr; + float *var_addr_ = nullptr; + float *scale_addr_ = nullptr; + float *offset_addr_ = nullptr; + float *out_addr_ = nullptr; BatchNormParameter *batchnorm_param_; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc index 2e32a31399..b3d553b3f9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc @@ -28,9 +28,7 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Scale; namespace mindspore::kernel { -ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); } - -void ScaleCPUKernel::FreeTmpBuffer() { +ScaleCPUKernel::~ScaleCPUKernel() { if (scale_param_->const_scale_) { if (scale_ != nullptr) { free(scale_); @@ -46,7 +44,6 @@ void ScaleCPUKernel::FreeTmpBuffer() { } int ScaleCPUKernel::InitScaleOffset() { - FreeTmpBuffer(); auto scale_tensor = in_tensors_.at(1); float *scale_ptr = reinterpret_cast(in_tensors_.at(1)->Data()); if (scale_ptr != nullptr) { @@ -116,10 +113,7 @@ int ScaleCPUKernel::Init() { if (!InferShapeDone()) { return RET_OK; } - return ReSize(); -} -int ScaleCPUKernel::ReSize() { auto ret = InitParameter(); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; @@ -134,6 +128,15 @@ int ScaleCPUKernel::ReSize() { return RET_OK; } +int ScaleCPUKernel::ReSize() { + auto ret = InitParameter(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; + return RET_ERROR; + } + return RET_OK; +} + int ScaleCPUKernel::Scale(int task_id) { auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); if (ret != RET_OK) { @@ -173,7 +176,6 @@ int ScaleCPUKernel::Run() { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; } - FreeTmpBuffer(); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h index 4e40a448f7..2cfded9e08 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.h @@ -41,11 +41,10 @@ class ScaleCPUKernel : public LiteKernel { int Scale(int task_id); private: - void FreeTmpBuffer(); - float *input_ptr_; - float *scale_; - float *offset_; - float *output_ptr_; + float *input_ptr_ = nullptr; + float *scale_ = nullptr; + float *offset_ = nullptr; + float *output_ptr_ = nullptr; ScaleParameter *scale_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc index dc36f5e933..007b5031b3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc @@ -84,4 +84,67 @@ TEST_F(TestBatchnormFp32, BNTest) { output0_tensor.SetData(nullptr); MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; } + +TEST_F(TestBatchnormFp32, FusedBNTest) { + std::vector in_data = {-7.400094, 11.37495, 2.0271842, 5.5954003, 13.255154, 4.6289115, + 9.591311, 8.699771, -12.226144, -6.1819935, 6.957936, -8.70818}; + std::vector scale = {13.323708, 14.0656395, 12.634319}; + std::vector offset = {27.888096, 24.533648, 15.335093}; + std::vector mean = {11.5127125, 0.47681615, 5.851508}; + std::vector var = {1.270583, 13.005714, 6.089223}; + std::vector inputs_tensor; + std::vector outputs_tensor; + + BatchNormParameter op_param; + op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; + op_param.epsilon_ = 0.001f; + + std::vector shape = {1, 2, 2, 3}; + lite::tensor::Tensor input[5]; + input[0].SetData(in_data.data()); + input[1].SetData(scale.data()); + input[2].SetData(offset.data()); + input[3].SetData(mean.data()); + input[4].SetData(var.data()); + + input[0].set_shape(shape); + for (int i = 1; i < 5; i++) { + input[i].set_shape({3}); + } + for (int i = 0; i < 5; i++) { + inputs_tensor.push_back(&input[i]); + } + + std::vector output(12); + std::vector corr_out = {-195.5765, 67.03745, -4.243883, -42.028015, 74.37044, 9.075897, + 5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563}; + + lite::tensor::Tensor output0_tensor; + outputs_tensor.push_back(&output0_tensor); + output0_tensor.SetData(output.data()); + output0_tensor.set_shape(shape); + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + lite::Context ctx; + ctx.thread_num_ = 1; + kernel::LiteKernel *kernel = + creator(inputs_tensor, outputs_tensor, reinterpret_cast(&op_param), &ctx, desc, nullptr); + ASSERT_NE(kernel, nullptr); + auto output_tensor_shape = output0_tensor.shape(); + kernel->Run(); + + printf("==================output data=================\n"); + for (int i = 0; i < output0_tensor.ElementsNum(); i++) { + std::cout << output[i] << " ,"; + } + std::cout << std::endl; + CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); + + for (int i = 1; i < 5; i++) { + input[i].SetData(nullptr); + } + output0_tensor.SetData(nullptr); + MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed"; +} } // namespace mindspore