From 900dfe5cbac952c2253604f2c7808a1b3629507d Mon Sep 17 00:00:00 2001 From: liuwenhao4 Date: Mon, 28 Sep 2020 19:09:55 +0800 Subject: [PATCH] Add crop fp16 ops --- mindspore/lite/nnacl/fp16/crop_fp16.c | 153 ++++++++++++++++++ mindspore/lite/nnacl/fp16/crop_fp16.h | 36 +++++ .../src/runtime/kernel/arm/base/crop_base.cc | 52 ++++++ .../src/runtime/kernel/arm/base/crop_base.h | 9 +- .../src/runtime/kernel/arm/fp16/crop_fp16.cc | 130 +++++++++++++++ .../src/runtime/kernel/arm/fp16/crop_fp16.h | 52 ++++++ .../src/runtime/kernel/arm/int8/crop_int8.cc | 60 +------ 7 files changed, 431 insertions(+), 61 deletions(-) create mode 100644 mindspore/lite/nnacl/fp16/crop_fp16.c create mode 100644 mindspore/lite/nnacl/fp16/crop_fp16.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.h diff --git a/mindspore/lite/nnacl/fp16/crop_fp16.c b/mindspore/lite/nnacl/fp16/crop_fp16.c new file mode 100644 index 0000000000..4df3399162 --- /dev/null +++ b/mindspore/lite/nnacl/fp16/crop_fp16.c @@ -0,0 +1,153 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp16/crop_fp16.h" + +#include + +#include "nnacl/crop_parameter.h" + +void Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para) { + int input_dim = para->input_dim_; + switch (input_dim) { + case 1: + Crop1D(input, output, task_id, para); + break; + case 2: + Crop2D(input, output, task_id, para); + break; + case 3: + Crop3D(input, output, task_id, para); + break; + case 4: + Crop4D(input, output, task_id, para); + break; + } +} + +void Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) { + const int out_batch = para->out_shape_[0]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_batch, thread_count) : out_batch; + if (task_id_stride <= 0) { + return; + } + int n = task_id * task_id_stride; + if (n >= out_batch) { + return; + } + const float16_t *in_ptr = input + n + para->in_offset_[0]; + float16_t *out_ptr = output + n; + int64_t out_dist_stride = MSMIN(out_batch - task_id * task_id_stride, task_id_stride); + memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_dist_stride); +} + +void Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } + + for (int n = 0; n < out_batch; n++) { + int h = task_id * task_id_stride; + if (h >= out_height) { + return; + } + const float16_t *in_ptr = input + (n + para->in_offset_[0]) * in_height + h + para->in_offset_[1]; + float16_t *out_ptr = output + n * out_height + h; + int64_t out_dist_stride = MSMIN(out_height - task_id * task_id_stride, task_id_stride); + memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_dist_stride); + } +} + +void Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int in_width = para->in_shape_[2]; + + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int out_width = para->out_shape_[2]; + + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } + + const int in_stride_h = in_width; + const int in_stride_n = in_stride_h * in_height; + + const int out_stride_h = out_width; + const int out_stride_n = out_stride_h * out_height; + + for (int n = 0; n < out_batch; n++) { + for (int t = 0; t < task_id_stride; t++) { + int h = t + task_id * task_id_stride; + if (h >= out_height) { + break; + } + const float16_t *in_ptr = + input + (n + para->in_offset_[0]) * in_stride_n + (h + para->in_offset_[1]) * in_stride_h + para->in_offset_[2]; + float16_t *out_ptr = output + n * out_stride_n + h * out_stride_h; + memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_width); + } + } +} + +void Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) { + const int in_height = para->in_shape_[1]; + const int in_width = para->in_shape_[2]; + const int in_channel = para->in_shape_[3]; + + const int out_batch = para->out_shape_[0]; + const int out_height = para->out_shape_[1]; + const int out_width = para->out_shape_[2]; + const int out_channel = para->out_shape_[3]; + + const int thread_count = para->thread_count_; + int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height; + if (task_id_stride <= 0) { + return; + } + + const int in_stride_w = in_channel; + const int in_stride_h = in_channel * in_width; + const int in_stride_n = in_stride_h * in_height; + + const int out_stride_w = out_channel; + const int out_stride_h = out_channel * out_width; + const int out_stride_n = out_stride_h * out_height; + + for (int n = 0; n < out_batch; n++) { + for (int t = 0; t < task_id_stride; t++) { + int h = t + task_id * task_id_stride; + if (h >= out_height) { + break; + } + for (int w = 0; w < out_width; w++) { + const float16_t *in_ptr = input + (n + para->in_offset_[0]) * in_stride_n + + (h + para->in_offset_[1]) * in_stride_h + (w + para->in_offset_[2]) * in_stride_w + + para->in_offset_[3]; + float16_t *out_ptr = output + n * out_stride_n + h * out_stride_h + w * out_stride_w; + memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_channel); + } + } + } +} diff --git a/mindspore/lite/nnacl/fp16/crop_fp16.h b/mindspore/lite/nnacl/fp16/crop_fp16.h new file mode 100644 index 0000000000..a0f723d581 --- /dev/null +++ b/mindspore/lite/nnacl/fp16/crop_fp16.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_ +#define MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_ + +#include +#include "nnacl/op_base.h" +#include "nnacl/crop_parameter.h" + +#ifdef __cplusplus +extern "C" { +#endif +void Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para); +void Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para); +void Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para); +void Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para); +void Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para); +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc index d0a2019a01..7274fade1a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.cc @@ -54,6 +54,58 @@ kernel::LiteKernel *CpuCropInt8KernelCreator(const std::vector & return kernel; } +int CropBaseCPUKernel::ReSize() { + auto *input_tensor = in_tensors_.at(kInputIndex); + auto input_shape = input_tensor->shape(); + size_t input_dim = input_shape.size(); + + crop_para_->in_shape_ = reinterpret_cast(malloc(input_dim * sizeof(int))); + if (crop_para_->in_shape_ == nullptr) { + MS_LOG(ERROR) << "in_shape_ is nullptr"; + return RET_ERROR; + } else { + memcpy(reinterpret_cast(const_cast(crop_para_->in_shape_)), input_shape.data(), + sizeof(int) * input_dim); + } + + auto *out_tensor = out_tensors_.at(kOutputIndex); + auto output_shape = out_tensor->shape(); + size_t output_dim = output_shape.size(); + + crop_para_->out_shape_ = reinterpret_cast(malloc(output_dim * sizeof(int))); + if (crop_para_->out_shape_ == nullptr) { + MS_LOG(ERROR) << "out_shape_ is nullptr"; + return RET_ERROR; + } else { + memcpy(reinterpret_cast(const_cast(crop_para_->out_shape_)), output_shape.data(), + sizeof(int) * output_dim); + } + MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE); + crop_para_->input_dim_ = input_dim; + PadOffset(input_dim, crop_para_); + return RET_OK; +} + +void CropBaseCPUKernel::PadOffset(int input_dim, CropParameter *crop_para) { + auto axis = crop_para->axis_; + auto offsets_size = crop_para->offset_size_; + MS_ASSERT(axis <= input_dim); + if (offsets_size > 1) { + MS_ASSERT(axis + offsets_size == input_dim); + } + for (int i = 0; i < input_dim; i++) { + int crop_offset = 0; + if (i >= axis) { + if (offsets_size == 1) { + crop_offset = crop_para->offset_[0]; + } else if (offsets_size > 1) { + crop_offset = crop_para->offset_[i - axis]; + } + } + crop_para->in_offset_[i] = crop_offset; + } +} + kernel::LiteKernel *CpuCropInt32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h index ae28780343..df98443803 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/crop_base.h @@ -29,15 +29,20 @@ class CropBaseCPUKernel : public LiteKernel { CropBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} + : LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) { + crop_para_ = reinterpret_cast(op_parameter_); + crop_para_->thread_count_ = op_parameter_->thread_num_; + } ~CropBaseCPUKernel() = default; int Init() override; - int ReSize() override { return 0; } + int ReSize() override; int Run() override { return 0; } protected: + CropParameter *crop_para_; int thread_count_; + void PadOffset(int input_dim, CropParameter *crop_para); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc new file mode 100644 index 0000000000..e7189a085e --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc @@ -0,0 +1,130 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/runtime/kernel/arm/fp16/crop_fp16.h" + +#include "include/errorcode.h" +#include "nnacl/crop_parameter.h" +#include "nnacl/fp16/cast_fp16.h" +#include "nnacl/fp16/crop_fp16.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/base/crop_base.h" +#include "src/runtime/kernel/arm/fp16/common_fp16.h" +#include "src/runtime/runtime_api.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Crop; + +namespace mindspore::kernel { + +int CropFp16CPUKernel::Init() { + auto ret = CropBaseCPUKernel::Init(); + if (ret != RET_OK) { + return ret; + } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int CropFp16CPUKernel::ReSize() { return CropBaseCPUKernel::ReSize(); } + +int CropFp16CPUKernel::DoExecute(int task_id) { + Crop(input_ptr_, output_ptr_, task_id, crop_para_); + return RET_OK; +} + +static int CropRun(void *cdata, int task_id) { + auto g_kernel = reinterpret_cast(cdata); + auto ret = g_kernel->DoExecute(task_id); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CropRun error task_id[" << task_id << "] error_code[" << ret << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int CropFp16CPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Prepare failed."; + return RET_ERROR; + } + input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_); + if (input_ptr_ == nullptr) { + MS_LOG(ERROR) << "input or output is nullptr"; + return RET_ERROR; + } + + output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_); + if (output_ptr_ == nullptr) { + FreeInputAndOutput(); + MS_LOG(ERROR) << "input or output is nullptr"; + return RET_ERROR; + } + + ret = ParallelLaunch(this->context_->thread_pool_, CropRun, this, thread_count_); + + if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { + Float16ToFloat32(output_ptr_, reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()), + out_tensors_.at(kOutputIndex)->ElementsNum()); + } + FreeInputAndOutput(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Crop error error_code[" << ret << "]"; + } + return ret; +} + +void CropFp16CPUKernel::FreeInputAndOutput() { + if (in_tensors_.at(kInputIndex)->data_type() == kNumberTypeFloat32) { + context_->allocator->Free(input_ptr_); + input_ptr_ = nullptr; + } + if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { + context_->allocator->Free(output_ptr_); + output_ptr_ = nullptr; + } +} + +kernel::LiteKernel *CpuCropFp16KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const InnerContext *ctx, const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Input opParameter is nullptr!"; + return nullptr; + } + MS_ASSERT(desc.type == schema::PrimitiveType_Crop); + auto *kernel = new (std::nothrow) CropFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new CropFp16CPUKernel fail!"; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} +REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Crop, CpuCropFp16KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.h new file mode 100644 index 0000000000..ad3facf6c2 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.h @@ -0,0 +1,52 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_ + +#include + +#include + +#include "src/lite_kernel.h" +#include "src/runtime/kernel/arm/base/crop_base.h" + +namespace mindspore::kernel { +class CropFp16CPUKernel : public CropBaseCPUKernel { + public: + CropFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) { + crop_para_ = reinterpret_cast(op_parameter_); + crop_para_->thread_count_ = op_parameter_->thread_num_; + } + ~CropFp16CPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + int DoExecute(int task_id); + + private: + float16_t *input_ptr_ = nullptr; + float16_t *output_ptr_ = nullptr; + CropParameter *crop_para_; + void FreeInputAndOutput(); +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc index 25f66da284..0d02ee04b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc @@ -44,16 +44,6 @@ int CropInt8CPUKernel::Init() { crop_para_->quant_arg.output_activation_max_ = std::numeric_limits::max(); crop_para_->quant_arg.output_activation_min_ = std::numeric_limits::min(); - crop_para_->in_shape_ = reinterpret_cast(malloc(input_tensor->shape().size() * sizeof(int))); - if (crop_para_->in_shape_ == nullptr) { - MS_LOG(ERROR) << "malloc memory failed"; - return RET_MEMORY_FAILED; - } - crop_para_->out_shape_ = reinterpret_cast(malloc(out_tensor->shape().size() * sizeof(int))); - if (crop_para_->out_shape_ == nullptr) { - MS_LOG(ERROR) << "malloc memory failed"; - return RET_MEMORY_FAILED; - } if (!InferShapeDone()) { return RET_OK; } @@ -72,35 +62,7 @@ CropInt8CPUKernel::~CropInt8CPUKernel() { } } -int CropInt8CPUKernel::ReSize() { - auto *input_tensor = in_tensors_.at(kInputIndex); - auto input_shape = input_tensor->shape(); - size_t input_dim = input_shape.size(); - - if (crop_para_->in_shape_ == nullptr) { - MS_LOG(ERROR) << "in_shape_ is nullptr"; - return RET_ERROR; - } else { - memcpy(reinterpret_cast(const_cast(crop_para_->in_shape_)), input_shape.data(), - sizeof(int) * input_dim); - } - - auto *out_tensor = out_tensors_.at(kOutputIndex); - auto output_shape = out_tensor->shape(); - size_t output_dim = output_shape.size(); - - if (crop_para_->out_shape_ == nullptr) { - MS_LOG(ERROR) << "out_shape_ is nullptr"; - return RET_ERROR; - } else { - memcpy(reinterpret_cast(const_cast(crop_para_->out_shape_)), output_shape.data(), - sizeof(int) * output_dim); - } - MS_ASSERT(input_dim <= CROP_OFFSET_MAX_SIZE); - crop_para_->input_dim_ = input_dim; - PadOffset(input_dim, crop_para_); - return RET_OK; -} +int CropInt8CPUKernel::ReSize() { return CropBaseCPUKernel::ReSize(); } int CropInt8CPUKernel::Run() { auto ret = Prepare(); @@ -112,26 +74,6 @@ int CropInt8CPUKernel::Run() { return ret; } -void PadOffset(int input_dim, CropParameter *crop_para) { - auto axis = crop_para->axis_; - auto offsets_size = crop_para->offset_size_; - MS_ASSERT(axis <= input_dim); - if (offsets_size > 1) { - MS_ASSERT(axis + offsets_size == input_dim); - } - for (int i = 0; i < input_dim; i++) { - int crop_offset = 0; - if (i >= axis) { - if (offsets_size == 1) { - crop_offset = crop_para->offset_[0]; - } else if (offsets_size > 1) { - crop_offset = crop_para->offset_[i - axis]; - } - } - crop_para->in_offset_[i] = crop_offset; - } -} - int CropInt8Run(void *cdata, int task_id) { auto crop = reinterpret_cast(cdata); crop->DoExecute(task_id);