!7654 add detection_post_process_int8 & add layernorm primitive register

Merge pull request !7654 from wangzhe/master
5 years ago · 7f209bdf12
parent 3b5582110d 86d0c61bf2
commit 7f209bdf12
8 changed files with 331 additions and 103 deletions
--- a/mindspore/lite/src/ops/detection_post_process.cc
+++ b/mindspore/lite/src/ops/detection_post_process.cc
@ -182,13 +182,13 @@ int DetectionPostProcess::InferShape(std::vector<lite::Tensor *> inputs_, std::v
  MS_ASSERT(num_det != nullptr);

  detected_boxes->SetFormat(boxes->GetFormat());
-  detected_boxes->set_data_type(boxes->data_type());
+  detected_boxes->set_data_type(kNumberTypeFloat32);
  detected_classes->SetFormat(boxes->GetFormat());
-  detected_classes->set_data_type(boxes->data_type());
+  detected_classes->set_data_type(kNumberTypeFloat32);
  detected_scores->SetFormat(boxes->GetFormat());
-  detected_scores->set_data_type(boxes->data_type());
+  detected_scores->set_data_type(kNumberTypeFloat32);
  num_det->SetFormat(boxes->GetFormat());
-  num_det->set_data_type(boxes->data_type());
+  num_det->set_data_type(kNumberTypeFloat32);
  if (!GetInferFlag()) {
    return RET_OK;
  }
--- a/mindspore/lite/src/ops/layer_norm.cc
+++ b/mindspore/lite/src/ops/layer_norm.cc
@ -15,6 +15,10 @@
 */
 #include "src/ops/layer_norm.h"

+#ifndef PRIMITIVE_WRITEABLE
+#include "src/ops/ops_register.h"
+#endif
+
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
@ -59,6 +63,10 @@ std::vector<int> LayerNorm::GetNormalizedShape() const {
 }
 float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); }
 bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); }
+PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) {
+  return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive);
+}
+Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator);

 #endif
 int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) {
--- a/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.cc
@ -0,0 +1,137 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
+#include <vector>
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+#include "nnacl/int8/quant_dtype_cast.h"
+
+using mindspore::kernel::KERNEL_ARCH::kCPU;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_DetectionPostProcess;
+
+namespace mindspore::kernel {
+int DetectionPostProcessBaseCPUKernel::Init() {
+  auto anchor_tensor = in_tensors_.at(2);
+  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
+  parameter->anchors_ = nullptr;
+  if (anchor_tensor->data_type() == kNumberTypeUInt8) {
+    const auto quant_params = anchor_tensor->GetQuantParams();
+    const double scale = quant_params.at(0).scale;
+    const int32_t zp = quant_params.at(0).zeroPoint;
+    auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->MutableData());
+    auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
+    if (anchor_fp32 == nullptr) {
+      MS_LOG(ERROR) << "Malloc anchor failed";
+      return RET_ERROR;
+    }
+    for (int i = 0; i < anchor_tensor->ElementsNum(); ++i) {
+      *(anchor_fp32 + i) = static_cast<float>((static_cast<int>(anchor_uint8[i]) - zp) * scale);
+    }
+    parameter->anchors_ = anchor_fp32;
+  } else if (anchor_tensor->data_type() == kNumberTypeFloat32) {
+    parameter->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
+    if (parameter->anchors_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc anchor failed";
+      return RET_ERROR;
+    }
+    memcpy(parameter->anchors_, anchor_tensor->MutableData(), anchor_tensor->Size());
+  } else {
+    MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() {
+  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
+  delete[](parameter->anchors_);
+}
+
+int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
+
+int DetectionPostProcessBaseCPUKernel::GetInputData() {
+  if ((in_tensors_.at(0)->data_type() != kNumberTypeFloat32 && in_tensors_.at(0)->data_type() != kNumberTypeFloat) ||
+      (in_tensors_.at(1)->data_type() != kNumberTypeFloat32 && in_tensors_.at(1)->data_type() != kNumberTypeFloat)) {
+    MS_LOG(ERROR) << "Input data type error";
+    return RET_ERROR;
+  }
+  input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
+  input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
+  return RET_OK;
+}
+
+int DetectionPostProcessBaseCPUKernel::Run() {
+  MS_ASSERT(context_->allocator != nullptr);
+  int status = GetInputData();
+  if (status != RET_OK) {
+    return status;
+  }
+  auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
+  auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
+  auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->MutableData());
+  auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->MutableData());
+
+  const int num_boxes = in_tensors_.at(0)->shape()[1];
+  const int num_classes_with_bg = in_tensors_.at(1)->shape()[2];
+  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
+  parameter->decoded_boxes_ = context_->allocator->Malloc(num_boxes * 4 * sizeof(float));
+  parameter->nms_candidate_ = context_->allocator->Malloc(num_boxes * sizeof(uint8_t));
+  parameter->selected_ = context_->allocator->Malloc(num_boxes * sizeof(int));
+  parameter->score_with_class_ = context_->allocator->Malloc(num_boxes * sizeof(ScoreWithIndex));
+  if (!parameter->decoded_boxes_ || !parameter->nms_candidate_ || !parameter->selected_ ||
+      !parameter->score_with_class_) {
+    MS_LOG(ERROR) << "malloc parameter->decoded_boxes_ || parameter->nms_candidate_ || parameter->selected_ || "
+                     "parameter->score_with_class_ failed.";
+    return RET_ERROR;
+  }
+  if (parameter->use_regular_nms_) {
+    parameter->score_with_class_all_ =
+      context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
+    parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
+    if (!parameter->score_with_class_all_ || !parameter->indexes_) {
+      MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed.";
+      return RET_ERROR;
+    }
+  } else {
+    parameter->score_with_class_all_ =
+      context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
+    if (!parameter->score_with_class_all_) {
+      MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ failed.";
+      return RET_ERROR;
+    }
+  }
+  DetectionPostProcess(num_boxes, num_classes_with_bg, input_boxes, input_scores, parameter->anchors_, output_boxes,
+                       output_classes, output_scores, output_num, parameter);
+  context_->allocator->Free(parameter->decoded_boxes_);
+  parameter->decoded_boxes_ = nullptr;
+  context_->allocator->Free(parameter->nms_candidate_);
+  parameter->nms_candidate_ = nullptr;
+  context_->allocator->Free(parameter->selected_);
+  parameter->selected_ = nullptr;
+  context_->allocator->Free(parameter->score_with_class_);
+  parameter->score_with_class_ = nullptr;
+  context_->allocator->Free(parameter->score_with_class_all_);
+  parameter->score_with_class_all_ = nullptr;
+  if (parameter->use_regular_nms_) {
+    context_->allocator->Free(parameter->indexes_);
+    parameter->indexes_ = nullptr;
+  }
+  return RET_OK;
+}
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/detection_post_process_base.h
@ -0,0 +1,47 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "include/context.h"
+#include "nnacl/fp32/detection_post_process.h"
+
+using mindspore::lite::InnerContext;
+
+namespace mindspore::kernel {
+class DetectionPostProcessBaseCPUKernel : public LiteKernel {
+ public:
+  DetectionPostProcessBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                                    const mindspore::lite::PrimitiveC *primitive)
+      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
+  virtual ~DetectionPostProcessBaseCPUKernel();
+
+  int Init() override;
+  int ReSize() override;
+  int Run() override;
+
+ protected:
+  float *input_boxes;
+  float *input_scores;
+
+  int GetInputData();
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.cc
@ -18,105 +18,14 @@
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
 #include "include/errorcode.h"
+#include "nnacl/int8/quant_dtype_cast.h"

-using mindspore::kernel::KERNEL_ARCH::kCPU;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_DetectionPostProcess;

 namespace mindspore::kernel {
-int DetectionPostProcessCPUKernel::Init() {
-  auto anchor_tensor = in_tensors_.at(2);
-  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
-  parameter->anchors_ = nullptr;
-  if (anchor_tensor->data_type() == kNumberTypeUInt8) {
-    const auto quant_params = anchor_tensor->GetQuantParams();
-    const double scale = quant_params.at(0).scale;
-    const int32_t zp = quant_params.at(0).zeroPoint;
-    auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->MutableData());
-    auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
-    if (anchor_fp32 == nullptr) {
-      MS_LOG(ERROR) << "Malloc anchor failed";
-      return RET_ERROR;
-    }
-    for (int i = 0; i < anchor_tensor->ElementsNum(); ++i) {
-      *(anchor_fp32 + i) = static_cast<float>((static_cast<int>(anchor_uint8[i]) - zp) * scale);
-    }
-    parameter->anchors_ = anchor_fp32;
-  } else if (anchor_tensor->data_type() == kNumberTypeFloat32) {
-    parameter->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
-    if (parameter->anchors_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc anchor failed";
-      return RET_ERROR;
-    }
-    memcpy(parameter->anchors_, anchor_tensor->MutableData(), anchor_tensor->Size());
-  } else {
-    MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
-    return RET_ERROR;
-  }
-  return RET_OK;
-}
-
-DetectionPostProcessCPUKernel::~DetectionPostProcessCPUKernel() {
-  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
-  delete[](parameter->anchors_);
-}
-
-int DetectionPostProcessCPUKernel::ReSize() { return RET_OK; }
-
-int DetectionPostProcessCPUKernel::Run() {
-  auto input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
-  auto input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
-
-  // output_classes and output_num use float type now
-  auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
-  auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->MutableData());
-  auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->MutableData());
-
-  MS_ASSERT(context_->allocator != nullptr);
-  const int num_boxes = in_tensors_.at(0)->shape()[1];
-  const int num_classes_with_bg = in_tensors_.at(1)->shape()[2];
-  DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
-  parameter->decoded_boxes_ = context_->allocator->Malloc(num_boxes * 4 * sizeof(float));
-  parameter->nms_candidate_ = context_->allocator->Malloc(num_boxes * sizeof(uint8_t));
-  parameter->selected_ = context_->allocator->Malloc(num_boxes * sizeof(int));
-  parameter->score_with_class_ = context_->allocator->Malloc(num_boxes * sizeof(ScoreWithIndex));
-  if (!parameter->decoded_boxes_ || !parameter->nms_candidate_ || !parameter->selected_ ||
-      !parameter->score_with_class_) {
-    MS_LOG(ERROR) << "malloc parameter->decoded_boxes_ || parameter->nms_candidate_ || parameter->selected_ || "
-                     "parameter->score_with_class_ failed.";
-    return RET_ERROR;
-  }
-  if (parameter->use_regular_nms_) {
-    parameter->score_with_class_all_ =
-      context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
-    parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
-    if (!parameter->score_with_class_all_ || !parameter->indexes_) {
-      MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed.";
-      return RET_ERROR;
-    }
-  } else {
-    parameter->score_with_class_all_ =
-      context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
-    if (!parameter->score_with_class_all_) {
-      MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ failed.";
-      return RET_ERROR;
-    }
-  }
-  DetectionPostProcess(num_boxes, num_classes_with_bg, input_boxes, input_scores, parameter->anchors_, output_boxes,
-                       output_classes, output_scores, output_num, parameter);
-  context_->allocator->Free(parameter->decoded_boxes_);
-  context_->allocator->Free(parameter->nms_candidate_);
-  context_->allocator->Free(parameter->selected_);
-  context_->allocator->Free(parameter->score_with_class_);
-  context_->allocator->Free(parameter->score_with_class_all_);
-  if (parameter->use_regular_nms_) {
-    context_->allocator->Free(parameter->indexes_);
-  }
-  return RET_OK;
-}

 kernel::LiteKernel *CpuDetectionPostProcessFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
                                                             const std::vector<lite::Tensor *> &outputs,
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process.h
@ -20,22 +20,19 @@
 #include <vector>
 #include "src/lite_kernel.h"
 #include "include/context.h"
+#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
 #include "nnacl/fp32/detection_post_process.h"

 using mindspore::lite::InnerContext;

 namespace mindspore::kernel {
-class DetectionPostProcessCPUKernel : public LiteKernel {
+class DetectionPostProcessCPUKernel : public DetectionPostProcessBaseCPUKernel {
 public:
  DetectionPostProcessCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                const mindspore::lite::PrimitiveC *primitive)
-      : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
-  ~DetectionPostProcessCPUKernel() override;
-
-  int Init() override;
-  int ReSize() override;
-  int Run() override;
+      : DetectionPostProcessBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~DetectionPostProcessCPUKernel() = default;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_H_
--- a/mindspore/lite/src/runtime/kernel/arm/int8/detection_post_process_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/detection_post_process_int8.cc
@ -0,0 +1,88 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/runtime/kernel/arm/int8/detection_post_process_int8.h"
+#include <vector>
+#include "schema/model_generated.h"
+#include "src/kernel_registry.h"
+#include "include/errorcode.h"
+#include "nnacl/int8/quant_dtype_cast.h"
+
+using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_OK;
+using mindspore::schema::PrimitiveType_DetectionPostProcess;
+
+namespace mindspore::kernel {
+
+int DetectionPostProcessInt8CPUKernel::Dequantize(lite::Tensor *tensor, float **data) {
+  auto data_int8 = reinterpret_cast<int8_t *>(tensor->MutableData());
+  *data = reinterpret_cast<float *>(context_->allocator->Malloc(tensor->ElementsNum() * sizeof(float)));
+  if (*data == nullptr) {
+    MS_LOG(ERROR) << "Malloc data failed.";
+    return RET_ERROR;
+  }
+  if (tensor->GetQuantParams().empty()) {
+    MS_LOG(ERROR) << "null quant param";
+    return RET_ERROR;
+  }
+  auto quant_param = tensor->GetQuantParams().front();
+  DoDequantizeInt8ToFp32(data_int8, *data, quant_param.scale, quant_param.zeroPoint, tensor->ElementsNum());
+  return RET_OK;
+}
+int DetectionPostProcessInt8CPUKernel::GetInputData() {
+  if (in_tensors_.at(0)->data_type() != kNumberTypeInt8 || in_tensors_.at(1)->data_type() != kNumberTypeInt8) {
+    MS_LOG(ERROR) << "Input data type error";
+    return RET_ERROR;
+  }
+  int status = Dequantize(in_tensors_.at(0), &input_boxes);
+  if (status != RET_OK) {
+    return status;
+  }
+  status = Dequantize(in_tensors_.at(1), &input_scores);
+  if (status != RET_OK) {
+    return status;
+  }
+  return RET_OK;
+}
+
+kernel::LiteKernel *CpuDetectionPostProcessInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
+                                                             const std::vector<lite::Tensor *> &outputs,
+                                                             OpParameter *opParameter, const lite::InnerContext *ctx,
+                                                             const kernel::KernelKey &desc,
+                                                             const mindspore::lite::PrimitiveC *primitive) {
+  if (opParameter == nullptr) {
+    MS_LOG(ERROR) << "Create kernel failed, opParameter is nullptr, type: PrimitiveType_DetectionPostProcess. ";
+    return nullptr;
+  }
+  MS_ASSERT(desc.type == schema::PrimitiveType_DetectionPostProcess);
+  auto *kernel = new (std::nothrow) DetectionPostProcessInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "new DetectionPostProcessInt8CPUKernel fail!";
+    free(opParameter);
+    return nullptr;
+  }
+  auto ret = kernel->Init();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
+                  << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
+    delete kernel;
+    return nullptr;
+  }
+  return kernel;
+}
+
+REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_DetectionPostProcess, CpuDetectionPostProcessInt8KernelCreator)
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/int8/detection_post_process_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/detection_post_process_int8.h
@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_
+#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_
+
+#include <vector>
+#include "src/lite_kernel.h"
+#include "include/context.h"
+#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
+#include "nnacl/fp32/detection_post_process.h"
+
+using mindspore::lite::InnerContext;
+
+namespace mindspore::kernel {
+class DetectionPostProcessInt8CPUKernel : public DetectionPostProcessBaseCPUKernel {
+ public:
+  DetectionPostProcessInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
+                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
+                                    const mindspore::lite::PrimitiveC *primitive)
+      : DetectionPostProcessBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
+  ~DetectionPostProcessInt8CPUKernel() = default;
+
+ private:
+  int GetInputData();
+  int Dequantize(lite::Tensor *tensor, float **data);
+};
+}  // namespace mindspore::kernel
+#endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_