From 897a59e9c0438c53e56cdf6de078c270b9ad8afa Mon Sep 17 00:00:00 2001
From: fuzhiye <fuzhiye@huawei.com>
Date: Thu, 25 Feb 2021 16:58:09 +0800
Subject: [PATCH] fix bug of arithmetic fp32 op

store dataType of weight and bias in fp16 op
---
 .../kernel/arm/fp16/arithmetic_fp16.cc        |  4 +
 .../kernel/arm/fp16/convolution_1x1_fp16.h    |  8 +-
 .../kernel/arm/fp16/convolution_base_fp16.cc  |  5 +-
 .../kernel/arm/fp16/convolution_base_fp16.h   |  9 +-
 .../arm/fp16/convolution_delegate_fp16.cc     | 83 ++++++++++---------
 .../arm/fp16/convolution_delegate_fp16.h      |  7 +-
 .../arm/fp16/convolution_depthwise_fp16.cc    |  6 +-
 .../arm/fp16/convolution_depthwise_fp16.h     | 10 ++-
 .../convolution_depthwise_slidewindow_fp16.cc |  9 +-
 .../convolution_depthwise_slidewindow_fp16.h  | 10 ++-
 .../kernel/arm/fp16/convolution_fp16.h        |  8 +-
 .../arm/fp16/convolution_winograd_fp16.h      |  9 +-
 .../arm/fp16/deconvolution_depthwise_fp16.h   |  5 +-
 .../kernel/arm/fp16/deconvolution_fp16.cc     | 20 +++--
 .../kernel/arm/fp16/deconvolution_fp16.h      |  5 +-
 .../arm/fp16/deconvolution_winograd_fp16.h    |  5 +-
 .../kernel/arm/fp32/arithmetic_fp32.cc        |  9 +-
 17 files changed, 117 insertions(+), 95 deletions(-)

diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
index e70d95b553..05e6fbad04 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@@ -136,6 +136,10 @@ int ArithmeticFP16CPUKernel::Execute(const void *input0, const void *input1, voi
 }
 
 int ArithmeticFP16CPUKernel::Run() {
+  if (CheckDataType() != RET_OK) {
+    MS_LOG(ERROR) << "ArithmeticFP16CPUKernel check dataType failed.";
+    return RET_ERROR;
+  }
   if (!input0_broadcast_) {
     input0_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_);
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
index e08274273e..f3b2953c09 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
@@ -31,11 +31,9 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                               void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx),
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
         origin_weight_(origin_weight),
-        origin_bias_(origin_bias),
-        origin_weight_data_type_(origin_weight_data_type),
-        origin_bias_data_type_(origin_bias_data_type) {}
+        origin_bias_(origin_bias) {}
   ~Convolution1x1FP16CPUKernel() override;
 
   int Init() override;
@@ -64,8 +62,6 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   float16_t *pack_input_ = nullptr;
   float16_t *output_ptr_ = nullptr;
   MatMulParameter *matmul_param_ = nullptr;
-  TypeId origin_weight_data_type_;
-  TypeId origin_bias_data_type_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
index d785529560..b1402a90ed 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.cc
@@ -39,9 +39,8 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() {
 }
 
 int ConvolutionBaseFP16CPUKernel::GetExecuteFilter(lite::Tensor *weight_tensor, void *origin_data) {
-  auto weight_data_type = weight_tensor->data_type();
-  MS_ASSERT(weight_data_type == kNumberTypeFloat32 || weight_data_type == kNumberTypeFloat16);
-  if (weight_data_type == kNumberTypeFloat32) {
+  MS_ASSERT(origin_weight_data_type_ == kNumberTypeFloat32 || origin_weight_data_type_ == kNumberTypeFloat16);
+  if (origin_weight_data_type_ == kNumberTypeFloat32) {
     float *origin_weight = reinterpret_cast<float *>(origin_data);
     size_t fp16_weight_size = weight_tensor->Channel() * weight_tensor->Batch() * weight_tensor->Height() *
                               weight_tensor->Width() * sizeof(float16_t);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
index 5a296e3bf0..4dc7ddd755 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_base_fp16.h
@@ -27,8 +27,11 @@ namespace mindspore::kernel {
 class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionBaseFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
+                               TypeId origin_weight_data_type, TypeId origin_bias_data_type)
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+        origin_weight_data_type_(origin_weight_data_type),
+        origin_bias_data_type_(origin_bias_data_type) {}
   ~ConvolutionBaseFP16CPUKernel() override;
 
   int Init() override { return mindspore::lite::RET_OK; }
@@ -46,6 +49,8 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel {
   float16_t *execute_input_ = nullptr;
   float16_t *execute_weight_ = nullptr;
   float16_t *execute_output_ = nullptr;
+  TypeId origin_weight_data_type_;
+  TypeId origin_bias_data_type_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
index 349c29a2cc..2f0c2c69b5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
@@ -69,17 +69,13 @@ int ConvolutionDelegateFP16CPUKernel::Init() {
     if (in_tensors_.size() == 3) {
       origin_bias_ = CopyData(in_tensors_.at(kBiasIndex));
       need_free_ = need_free_ | BIAS_NEED_FREE;
-      origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type();
     }
-    origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type();
     return RET_OK;
   }
   origin_weight_ = in_tensors_.at(kWeightIndex)->data_c();
   if (in_tensors_.size() == 3) {
     origin_bias_ = in_tensors_.at(kBiasIndex)->data_c();
-    origin_bias_data_type_ = in_tensors_.at(kBiasIndex)->data_type();
   }
-  origin_weight_data_type_ = in_tensors_.at(kWeightIndex)->data_type();
   return ReSize();
 }
 
@@ -110,6 +106,28 @@ ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) {
   return conv_parameter;
 }
 
+kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
+                                               const InnerContext *ctx, void *origin_weight, void *origin_bias,
+                                               TypeId origin_weight_data_type, TypeId origin_bias_data_type) {
+  MS_ASSERT(opParameter != nullptr);
+  auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
+  kernel::LiteKernel *kernel;
+  if (conv_param->input_channel_ < 32) {
+    kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(
+      opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
+  } else {
+    kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(
+      opParameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
+  }
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "kernel is nullptr.";
+    free(opParameter);
+    return nullptr;
+  }
+  return kernel;
+}
+
 kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &inputs,
                                             const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
                                             const lite::InnerContext *ctx, void *origin_weight, void *origin_bias,
@@ -119,12 +137,17 @@ kernel::LiteKernel *CpuConvFp16KernelSelect(const std::vector<lite::Tensor *> &i
   int out_unit;
   CheckIfUseWinogradFp16(&use_winograd, &out_unit, conv_param);
   kernel::LiteKernel *kernel = nullptr;
-  if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
+
+  if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
+    kernel = CpuConvDwFp16KernelCreator(inputs, outputs, op_parameter, ctx, origin_weight, origin_bias,
+                                        origin_weight_data_type, origin_bias_data_type);
+  } else if (conv_param->kernel_h_ == 1 && conv_param->kernel_w_ == 1) {
     kernel = new (std::nothrow) kernel::Convolution1x1FP16CPUKernel(
       op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
   } else if (use_winograd) {
-    kernel = new (std::nothrow) kernel::ConvolutionWinogradFP16CPUKernel(
-      op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias, origin_bias_data_type);
+    kernel = new (std::nothrow)
+      kernel::ConvolutionWinogradFP16CPUKernel(op_parameter, inputs, outputs, ctx, out_unit, origin_weight, origin_bias,
+                                               origin_weight_data_type, origin_bias_data_type);
   } else {
     kernel = new (std::nothrow) kernel::ConvolutionFP16CPUKernel(
       op_parameter, inputs, outputs, ctx, origin_weight, origin_bias, origin_weight_data_type, origin_bias_data_type);
@@ -211,7 +234,13 @@ static lite::Tensor *CreateOutputTensorFp16(const std::vector<int> &out_shape,
 kernel::LiteKernel *CreateDelegateConvFp16(const std::vector<lite::Tensor *> &inputs,
                                            const std::vector<lite::Tensor *> &outputs, OpParameter *op_parameter,
                                            const InnerContext *ctx) {
-  return new (std::nothrow) kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx);
+  auto weight_data_type = inputs.at(1)->data_type();
+  TypeId bias_data_type = kTypeUnknown;
+  if (inputs.size() == 3) {
+    bias_data_type = inputs.at(2)->data_type();
+  }
+  return new (std::nothrow)
+    kernel::ConvolutionDelegateFP16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
 }
 
 kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
@@ -302,33 +331,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor
     GroupConvolutionFP16CPUKernel(op_parameter, inputs, outputs, ctx, group_convs, conv_param->group_);
 }
 
-kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
-                                               const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
-                                               const InnerContext *ctx, const kernel::KernelKey &desc) {
-  MS_ASSERT(opParameter != nullptr);
-
-  auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
-  kernel::LiteKernel *kernel;
-  if (conv_param->input_channel_ < 32) {
-    kernel = new (std::nothrow) kernel::ConvolutionDepthwiseSWFp16CPUKernel(opParameter, inputs, outputs, ctx);
-  } else {
-    kernel = new (std::nothrow) kernel::ConvolutionDepthwiseFp16CPUKernel(opParameter, inputs, outputs, ctx);
-  }
-  if (kernel == nullptr) {
-    MS_LOG(ERROR) << "kernel is nullptr.";
-    free(opParameter);
-    return nullptr;
-  }
-  auto ret = kernel->Init();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
-                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
-    delete kernel;
-    return nullptr;
-  }
-  return kernel;
-}
-
 kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                              const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
                                              const InnerContext *ctx, const kernel::KernelKey &desc) {
@@ -337,12 +339,13 @@ kernel::LiteKernel *CpuConvFp16KernelCreator(const std::vector<lite::Tensor *> &
 
   auto conv_param = reinterpret_cast<ConvParameter *>(opParameter);
   kernel::LiteKernel *kernel = nullptr;
-  if (conv_param->group_ == 1) {
-    kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx);
-  } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
-    kernel = CpuConvDwFp16KernelCreator(inputs, outputs, opParameter, ctx, desc);
-  } else {
+  bool is_depthwise =
+    (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_);
+
+  if (conv_param->group_ > 1 && !is_depthwise) {
     kernel = CpuGroupConvFp16KernelCreator(inputs, outputs, opParameter, ctx);
+  } else {
+    kernel = CreateDelegateConvFp16(inputs, outputs, opParameter, ctx);
   }
 
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
index a4e66c3ed7..bc686cc76e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
@@ -29,8 +29,11 @@ namespace mindspore::kernel {
 class ConvolutionDelegateFP16CPUKernel : public LiteKernel {
  public:
   ConvolutionDelegateFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : LiteKernel(parameter, inputs, outputs, ctx) {}
+                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                                   TypeId origin_weight_data_type, TypeId origin_bias_data_type)
+      : LiteKernel(parameter, inputs, outputs, ctx),
+        origin_weight_data_type_(origin_weight_data_type),
+        origin_bias_data_type_(origin_bias_data_type) {}
   ~ConvolutionDelegateFP16CPUKernel() override {
     FreeCopiedData();
     if (fp16_conv_kernel_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
index 17e3ec4626..3121e93809 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@@ -42,7 +42,7 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, weight_tensor->data_c());
+  auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteFilter(weight_tensor, origin_weight_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "get execute filter data failed.";
     return ret;
@@ -63,8 +63,8 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
   auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_);
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
-    MS_ASSERT(ori_bias);
+    MS_ASSERT(origin_bias_);
+    auto ori_bias = reinterpret_cast<float *>(origin_bias_);
     for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
       bias_fp16[i] = (float16_t)ori_bias[i];
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
index f59863ceb5..0f08247b83 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
@@ -35,8 +35,12 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {}
+                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
+                                    void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
+                                    TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
   ~ConvolutionDepthwiseFp16CPUKernel() override;
 
   int Init() override;
@@ -47,6 +51,8 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   int Execute(int task_id);
 
  private:
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
   float16_t *packed_weight_ = nullptr;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index adb47b9d86..9bc4503cee 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -61,7 +61,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
   // init weight: o, h, w, i; o == group, i == 1
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->MutableData());
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
 
   packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
@@ -69,8 +68,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  PackNCHWFp32ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                           weight_tensor->Batch());
+  PackNCHWFp32ToNC8HW8Fp16(reinterpret_cast<float *>(origin_weight_), packed_weight_, 1,
+                           weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
 
   bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
   if (bias_data_ == nullptr) {
@@ -81,8 +80,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
   auto bias_fp16 = reinterpret_cast<float16_t *>(bias_data_);
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->MutableData());
-    MS_ASSERT(ori_bias);
+    MS_ASSERT(origin_bias_);
+    auto ori_bias = reinterpret_cast<float *>(origin_bias_);
     for (int i = 0; i < bias_tensor->ElementsNum(); i++) {
       bias_fp16[i] = (float16_t)ori_bias[i];
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index 6bf78f8859..4dadf8ff28 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -36,8 +36,12 @@ namespace mindspore::kernel {
 class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {}
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
+                                      void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
+                                      TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {}
   ~ConvolutionDepthwiseSWFp16CPUKernel() override;
 
   int Init() override;
@@ -50,6 +54,8 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseFP16CPUKernel
 
  private:
   void FreePackedInputOutput();
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
   SlidingWindowParam *sliding_ = nullptr;
   float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
index bdc5f074d8..7424ed80e1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
@@ -28,11 +28,9 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                            void *origin_bias, TypeId origin_weight_data_type, TypeId origin_bias_data_type)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx),
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
         origin_weight_(origin_weight),
-        origin_bias_(origin_bias),
-        origin_weight_data_type_(origin_weight_data_type),
-        origin_bias_data_type_(origin_bias_data_type) {}
+        origin_bias_(origin_bias) {}
   ~ConvolutionFP16CPUKernel() override {
     if (packed_weight_ != nullptr) {
       free(packed_weight_);
@@ -64,8 +62,6 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   float16_t *packed_input_ = nullptr;
   float16_t *packed_weight_ = nullptr;
   float16_t *col_major_input_ = nullptr;
-  TypeId origin_weight_data_type_;
-  TypeId origin_bias_data_type_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
index 0bb273a582..627fe5092e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
@@ -31,12 +31,12 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit,
-                                   void *origin_weight, void *origin_bias, TypeId origin_bias_data_type)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx),
+                                   void *origin_weight, void *origin_bias, TypeId origin_weight_data_type,
+                                   TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type),
         output_unit_(out_unit),
         origin_weight_(origin_weight),
-        origin_bias_(origin_bias),
-        origin_bias_data_type_(origin_bias_data_type) {}
+        origin_bias_(origin_bias) {}
   ~ConvolutionWinogradFP16CPUKernel() override {
     if (trans_weight_ != nullptr) {
       free(trans_weight_);
@@ -86,7 +86,6 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel {
   TmpBufferAddressFp16 tmp_buffer_address_list_[4];
   InputTransFp16Func in_func_;
   OutputTransFp16Func out_func_;
-  TypeId origin_bias_data_type_;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index e49618c6e1..0d6dfdd87b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -37,8 +37,9 @@ namespace mindspore::kernel {
 class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {}
+                                      const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx,
+                                      TypeId origin_weight_data_type, TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
   ~DeconvolutionDepthwiseFp16CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
index 5363050c6e..6f3106ec4c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -216,21 +216,25 @@ kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *>
   MS_ASSERT(op_parameter != nullptr);
   MS_ASSERT(desc.type == schema::PrimitiveType_Conv2dTransposeFusion);
 
+  auto weight_data_type = inputs.at(1)->data_type();
+  TypeId bias_data_type = kTypeUnknown;
+  if (inputs.size() == 3) {
+    bias_data_type = inputs.at(2)->data_type();
+  }
   kernel::LiteKernel *kernel = nullptr;
   auto conv_param = reinterpret_cast<ConvParameter *>(op_parameter);
-
   if (conv_param->group_ == 1) {
     if ((conv_param->stride_h_ != 1 || conv_param->stride_w_ != 1) &&
-        (conv_param->dilation_w_ == 1 && conv_param->dilation_h_ == 1)) {
-      kernel = new (std::nothrow) kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx);
+        (conv_param->dilation_h_ == 1 && conv_param->dilation_w_ == 1)) {
+      kernel = new (std::nothrow)
+        kernel::DeConvWinogradFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
     } else {
-      kernel = new (std::nothrow) kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx);
+      kernel = new (std::nothrow)
+        kernel::DeConvolutionFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
     }
   } else if (conv_param->group_ == conv_param->input_channel_ && conv_param->group_ == conv_param->output_channel_) {
-    kernel = new (std::nothrow) DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx);
-  } else {
-    MS_LOG(ERROR) << "deconv do not support group deconv!";
-    kernel = nullptr;
+    kernel = new (std::nothrow)
+      DeconvolutionDepthwiseFp16CPUKernel(op_parameter, inputs, outputs, ctx, weight_data_type, bias_data_type);
   }
 
   if (kernel == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
index 54e9ddcdef..d1ecc46057 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
@@ -27,8 +27,9 @@ namespace mindspore::kernel {
 class DeConvolutionFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {}
+                             const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                             TypeId origin_weight_data_type, TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
   ~DeConvolutionFp16CPUKernel() override;
   int Init() override;
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
index 1b4220415c..e099e91fc0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
@@ -28,8 +28,9 @@ namespace mindspore::kernel {
 class DeConvWinogradFp16CPUKernel : public ConvolutionBaseFP16CPUKernel {
  public:
   DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx) {}
+                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                              TypeId origin_weight_data_type, TypeId origin_bias_data_type)
+      : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, origin_weight_data_type, origin_bias_data_type) {}
   ~DeConvWinogradFp16CPUKernel() override;
   int Init() override;
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
index 39c4a4aabc..bbdbd20fb5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
@@ -49,11 +49,6 @@ int ArithmeticCPUKernel::Init() {
 }
 
 int ArithmeticCPUKernel::ReSize() {
-  if (CheckDataType() != RET_OK) {
-    MS_LOG(ERROR) << "ArithmeticCPUKernel resize failed.";
-    return RET_ERROR;
-  }
-
   CalcMultiplesAndStrides(param_);
   if (param_->broadcasting_) {
     outside_ = 1;
@@ -359,6 +354,10 @@ int ArithmeticsRun(void *cdata, int task_id) {
 }
 
 int ArithmeticCPUKernel::Run() {
+  if (CheckDataType() != RET_OK) {
+    MS_LOG(ERROR) << "ArithmeticCPUKernel check dataType failed.";
+    return RET_ERROR;
+  }
   if (!input0_broadcast_) {
     input0_ptr_ = in_tensors_[0]->data_c();
   }