!7654 add detection_post_process_int8 & add layernorm primitive register

Merge pull request !7654 from wangzhe/master
pull/7654/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 7f209bdf12

@ -182,13 +182,13 @@ int DetectionPostProcess::InferShape(std::vector<lite::Tensor *> inputs_, std::v
MS_ASSERT(num_det != nullptr);
detected_boxes->SetFormat(boxes->GetFormat());
detected_boxes->set_data_type(boxes->data_type());
detected_boxes->set_data_type(kNumberTypeFloat32);
detected_classes->SetFormat(boxes->GetFormat());
detected_classes->set_data_type(boxes->data_type());
detected_classes->set_data_type(kNumberTypeFloat32);
detected_scores->SetFormat(boxes->GetFormat());
detected_scores->set_data_type(boxes->data_type());
detected_scores->set_data_type(kNumberTypeFloat32);
num_det->SetFormat(boxes->GetFormat());
num_det->set_data_type(boxes->data_type());
num_det->set_data_type(kNumberTypeFloat32);
if (!GetInferFlag()) {
return RET_OK;
}

@ -15,6 +15,10 @@
*/
#include "src/ops/layer_norm.h"
#ifndef PRIMITIVE_WRITEABLE
#include "src/ops/ops_register.h"
#endif
namespace mindspore {
namespace lite {
#ifdef PRIMITIVE_WRITEABLE
@ -59,6 +63,10 @@ std::vector<int> LayerNorm::GetNormalizedShape() const {
}
float LayerNorm::GetEpsilon() const { return this->primitive_->value_as_LayerNorm()->epsilon(); }
bool LayerNorm::GetElementwiseAffine() const { return this->primitive_->value_as_LayerNorm()->elementwiseAffine(); }
PrimitiveC *LayerNormCreator(const schema::Primitive *primitive) {
return PrimitiveC::NewPrimitiveC<LayerNorm>(primitive);
}
Registry LayerNormRegistry(schema::PrimitiveType_LayerNorm, LayerNormCreator);
#endif
int LayerNorm::InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) {

@ -0,0 +1,137 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
#include <vector>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/int8/quant_dtype_cast.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_DetectionPostProcess;
namespace mindspore::kernel {
int DetectionPostProcessBaseCPUKernel::Init() {
auto anchor_tensor = in_tensors_.at(2);
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
parameter->anchors_ = nullptr;
if (anchor_tensor->data_type() == kNumberTypeUInt8) {
const auto quant_params = anchor_tensor->GetQuantParams();
const double scale = quant_params.at(0).scale;
const int32_t zp = quant_params.at(0).zeroPoint;
auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->MutableData());
auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
if (anchor_fp32 == nullptr) {
MS_LOG(ERROR) << "Malloc anchor failed";
return RET_ERROR;
}
for (int i = 0; i < anchor_tensor->ElementsNum(); ++i) {
*(anchor_fp32 + i) = static_cast<float>((static_cast<int>(anchor_uint8[i]) - zp) * scale);
}
parameter->anchors_ = anchor_fp32;
} else if (anchor_tensor->data_type() == kNumberTypeFloat32) {
parameter->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
if (parameter->anchors_ == nullptr) {
MS_LOG(ERROR) << "Malloc anchor failed";
return RET_ERROR;
}
memcpy(parameter->anchors_, anchor_tensor->MutableData(), anchor_tensor->Size());
} else {
MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
return RET_ERROR;
}
return RET_OK;
}
DetectionPostProcessBaseCPUKernel::~DetectionPostProcessBaseCPUKernel() {
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
delete[](parameter->anchors_);
}
int DetectionPostProcessBaseCPUKernel::ReSize() { return RET_OK; }
int DetectionPostProcessBaseCPUKernel::GetInputData() {
if ((in_tensors_.at(0)->data_type() != kNumberTypeFloat32 && in_tensors_.at(0)->data_type() != kNumberTypeFloat) ||
(in_tensors_.at(1)->data_type() != kNumberTypeFloat32 && in_tensors_.at(1)->data_type() != kNumberTypeFloat)) {
MS_LOG(ERROR) << "Input data type error";
return RET_ERROR;
}
input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
return RET_OK;
}
int DetectionPostProcessBaseCPUKernel::Run() {
MS_ASSERT(context_->allocator != nullptr);
int status = GetInputData();
if (status != RET_OK) {
return status;
}
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->MutableData());
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->MutableData());
const int num_boxes = in_tensors_.at(0)->shape()[1];
const int num_classes_with_bg = in_tensors_.at(1)->shape()[2];
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
parameter->decoded_boxes_ = context_->allocator->Malloc(num_boxes * 4 * sizeof(float));
parameter->nms_candidate_ = context_->allocator->Malloc(num_boxes * sizeof(uint8_t));
parameter->selected_ = context_->allocator->Malloc(num_boxes * sizeof(int));
parameter->score_with_class_ = context_->allocator->Malloc(num_boxes * sizeof(ScoreWithIndex));
if (!parameter->decoded_boxes_ || !parameter->nms_candidate_ || !parameter->selected_ ||
!parameter->score_with_class_) {
MS_LOG(ERROR) << "malloc parameter->decoded_boxes_ || parameter->nms_candidate_ || parameter->selected_ || "
"parameter->score_with_class_ failed.";
return RET_ERROR;
}
if (parameter->use_regular_nms_) {
parameter->score_with_class_all_ =
context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
if (!parameter->score_with_class_all_ || !parameter->indexes_) {
MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed.";
return RET_ERROR;
}
} else {
parameter->score_with_class_all_ =
context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
if (!parameter->score_with_class_all_) {
MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ failed.";
return RET_ERROR;
}
}
DetectionPostProcess(num_boxes, num_classes_with_bg, input_boxes, input_scores, parameter->anchors_, output_boxes,
output_classes, output_scores, output_num, parameter);
context_->allocator->Free(parameter->decoded_boxes_);
parameter->decoded_boxes_ = nullptr;
context_->allocator->Free(parameter->nms_candidate_);
parameter->nms_candidate_ = nullptr;
context_->allocator->Free(parameter->selected_);
parameter->selected_ = nullptr;
context_->allocator->Free(parameter->score_with_class_);
parameter->score_with_class_ = nullptr;
context_->allocator->Free(parameter->score_with_class_all_);
parameter->score_with_class_all_ = nullptr;
if (parameter->use_regular_nms_) {
context_->allocator->Free(parameter->indexes_);
parameter->indexes_ = nullptr;
}
return RET_OK;
}
} // namespace mindspore::kernel

@ -0,0 +1,47 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "nnacl/fp32/detection_post_process.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class DetectionPostProcessBaseCPUKernel : public LiteKernel {
public:
DetectionPostProcessBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
virtual ~DetectionPostProcessBaseCPUKernel();
int Init() override;
int ReSize() override;
int Run() override;
protected:
float *input_boxes;
float *input_scores;
int GetInputData();
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_BASE_H_

@ -18,105 +18,14 @@
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/int8/quant_dtype_cast.h"
using mindspore::kernel::KERNEL_ARCH::kCPU;
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_DetectionPostProcess;
namespace mindspore::kernel {
int DetectionPostProcessCPUKernel::Init() {
auto anchor_tensor = in_tensors_.at(2);
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
parameter->anchors_ = nullptr;
if (anchor_tensor->data_type() == kNumberTypeUInt8) {
const auto quant_params = anchor_tensor->GetQuantParams();
const double scale = quant_params.at(0).scale;
const int32_t zp = quant_params.at(0).zeroPoint;
auto anchor_uint8 = reinterpret_cast<uint8_t *>(anchor_tensor->MutableData());
auto anchor_fp32 = new (std::nothrow) float[anchor_tensor->ElementsNum()];
if (anchor_fp32 == nullptr) {
MS_LOG(ERROR) << "Malloc anchor failed";
return RET_ERROR;
}
for (int i = 0; i < anchor_tensor->ElementsNum(); ++i) {
*(anchor_fp32 + i) = static_cast<float>((static_cast<int>(anchor_uint8[i]) - zp) * scale);
}
parameter->anchors_ = anchor_fp32;
} else if (anchor_tensor->data_type() == kNumberTypeFloat32) {
parameter->anchors_ = new (std::nothrow) float[anchor_tensor->ElementsNum()];
if (parameter->anchors_ == nullptr) {
MS_LOG(ERROR) << "Malloc anchor failed";
return RET_ERROR;
}
memcpy(parameter->anchors_, anchor_tensor->MutableData(), anchor_tensor->Size());
} else {
MS_LOG(ERROR) << "unsupported anchor data type " << anchor_tensor->data_type();
return RET_ERROR;
}
return RET_OK;
}
DetectionPostProcessCPUKernel::~DetectionPostProcessCPUKernel() {
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
delete[](parameter->anchors_);
}
int DetectionPostProcessCPUKernel::ReSize() { return RET_OK; }
int DetectionPostProcessCPUKernel::Run() {
auto input_boxes = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
auto input_scores = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
// output_classes and output_num use float type now
auto output_boxes = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
auto output_classes = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
auto output_scores = reinterpret_cast<float *>(out_tensors_.at(2)->MutableData());
auto output_num = reinterpret_cast<float *>(out_tensors_.at(3)->MutableData());
MS_ASSERT(context_->allocator != nullptr);
const int num_boxes = in_tensors_.at(0)->shape()[1];
const int num_classes_with_bg = in_tensors_.at(1)->shape()[2];
DetectionPostProcessParameter *parameter = reinterpret_cast<DetectionPostProcessParameter *>(op_parameter_);
parameter->decoded_boxes_ = context_->allocator->Malloc(num_boxes * 4 * sizeof(float));
parameter->nms_candidate_ = context_->allocator->Malloc(num_boxes * sizeof(uint8_t));
parameter->selected_ = context_->allocator->Malloc(num_boxes * sizeof(int));
parameter->score_with_class_ = context_->allocator->Malloc(num_boxes * sizeof(ScoreWithIndex));
if (!parameter->decoded_boxes_ || !parameter->nms_candidate_ || !parameter->selected_ ||
!parameter->score_with_class_) {
MS_LOG(ERROR) << "malloc parameter->decoded_boxes_ || parameter->nms_candidate_ || parameter->selected_ || "
"parameter->score_with_class_ failed.";
return RET_ERROR;
}
if (parameter->use_regular_nms_) {
parameter->score_with_class_all_ =
context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(ScoreWithIndex));
parameter->indexes_ = context_->allocator->Malloc((num_boxes + parameter->max_detections_) * sizeof(int));
if (!parameter->score_with_class_all_ || !parameter->indexes_) {
MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ || parameter->indexes_ failed.";
return RET_ERROR;
}
} else {
parameter->score_with_class_all_ =
context_->allocator->Malloc((num_boxes * parameter->num_classes_) * sizeof(ScoreWithIndex));
if (!parameter->score_with_class_all_) {
MS_LOG(ERROR) << "malloc parameter->score_with_class_all_ failed.";
return RET_ERROR;
}
}
DetectionPostProcess(num_boxes, num_classes_with_bg, input_boxes, input_scores, parameter->anchors_, output_boxes,
output_classes, output_scores, output_num, parameter);
context_->allocator->Free(parameter->decoded_boxes_);
context_->allocator->Free(parameter->nms_candidate_);
context_->allocator->Free(parameter->selected_);
context_->allocator->Free(parameter->score_with_class_);
context_->allocator->Free(parameter->score_with_class_all_);
if (parameter->use_regular_nms_) {
context_->allocator->Free(parameter->indexes_);
}
return RET_OK;
}
kernel::LiteKernel *CpuDetectionPostProcessFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,

@ -20,22 +20,19 @@
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
#include "nnacl/fp32/detection_post_process.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class DetectionPostProcessCPUKernel : public LiteKernel {
class DetectionPostProcessCPUKernel : public DetectionPostProcessBaseCPUKernel {
public:
DetectionPostProcessCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~DetectionPostProcessCPUKernel() override;
int Init() override;
int ReSize() override;
int Run() override;
: DetectionPostProcessBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~DetectionPostProcessCPUKernel() = default;
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DETECTION_POST_PROCESS_H_

@ -0,0 +1,88 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/runtime/kernel/arm/int8/detection_post_process_int8.h"
#include <vector>
#include "schema/model_generated.h"
#include "src/kernel_registry.h"
#include "include/errorcode.h"
#include "nnacl/int8/quant_dtype_cast.h"
using mindspore::lite::KernelRegistrar;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_DetectionPostProcess;
namespace mindspore::kernel {
int DetectionPostProcessInt8CPUKernel::Dequantize(lite::Tensor *tensor, float **data) {
auto data_int8 = reinterpret_cast<int8_t *>(tensor->MutableData());
*data = reinterpret_cast<float *>(context_->allocator->Malloc(tensor->ElementsNum() * sizeof(float)));
if (*data == nullptr) {
MS_LOG(ERROR) << "Malloc data failed.";
return RET_ERROR;
}
if (tensor->GetQuantParams().empty()) {
MS_LOG(ERROR) << "null quant param";
return RET_ERROR;
}
auto quant_param = tensor->GetQuantParams().front();
DoDequantizeInt8ToFp32(data_int8, *data, quant_param.scale, quant_param.zeroPoint, tensor->ElementsNum());
return RET_OK;
}
int DetectionPostProcessInt8CPUKernel::GetInputData() {
if (in_tensors_.at(0)->data_type() != kNumberTypeInt8 || in_tensors_.at(1)->data_type() != kNumberTypeInt8) {
MS_LOG(ERROR) << "Input data type error";
return RET_ERROR;
}
int status = Dequantize(in_tensors_.at(0), &input_boxes);
if (status != RET_OK) {
return status;
}
status = Dequantize(in_tensors_.at(1), &input_scores);
if (status != RET_OK) {
return status;
}
return RET_OK;
}
kernel::LiteKernel *CpuDetectionPostProcessInt8KernelCreator(const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs,
OpParameter *opParameter, const lite::InnerContext *ctx,
const kernel::KernelKey &desc,
const mindspore::lite::PrimitiveC *primitive) {
if (opParameter == nullptr) {
MS_LOG(ERROR) << "Create kernel failed, opParameter is nullptr, type: PrimitiveType_DetectionPostProcess. ";
return nullptr;
}
MS_ASSERT(desc.type == schema::PrimitiveType_DetectionPostProcess);
auto *kernel = new (std::nothrow) DetectionPostProcessInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
if (kernel == nullptr) {
MS_LOG(ERROR) << "new DetectionPostProcessInt8CPUKernel fail!";
free(opParameter);
return nullptr;
}
auto ret = kernel->Init();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
delete kernel;
return nullptr;
}
return kernel;
}
REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_DetectionPostProcess, CpuDetectionPostProcessInt8KernelCreator)
} // namespace mindspore::kernel

@ -0,0 +1,42 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_
#include <vector>
#include "src/lite_kernel.h"
#include "include/context.h"
#include "src/runtime/kernel/arm/base/detection_post_process_base.h"
#include "nnacl/fp32/detection_post_process.h"
using mindspore::lite::InnerContext;
namespace mindspore::kernel {
class DetectionPostProcessInt8CPUKernel : public DetectionPostProcessBaseCPUKernel {
public:
DetectionPostProcessInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
const mindspore::lite::PrimitiveC *primitive)
: DetectionPostProcessBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
~DetectionPostProcessInt8CPUKernel() = default;
private:
int GetInputData();
int Dequantize(lite::Tensor *tensor, float **data);
};
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_DETECTION_POST_PROCESS_INT8_H_
Loading…
Cancel
Save