parent
8f55187492
commit
900dfe5cba
@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/crop_fp16.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
void Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
|
||||
int input_dim = para->input_dim_;
|
||||
switch (input_dim) {
|
||||
case 1:
|
||||
Crop1D(input, output, task_id, para);
|
||||
break;
|
||||
case 2:
|
||||
Crop2D(input, output, task_id, para);
|
||||
break;
|
||||
case 3:
|
||||
Crop3D(input, output, task_id, para);
|
||||
break;
|
||||
case 4:
|
||||
Crop4D(input, output, task_id, para);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
|
||||
const int out_batch = para->out_shape_[0];
|
||||
const int thread_count = para->thread_count_;
|
||||
int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_batch, thread_count) : out_batch;
|
||||
if (task_id_stride <= 0) {
|
||||
return;
|
||||
}
|
||||
int n = task_id * task_id_stride;
|
||||
if (n >= out_batch) {
|
||||
return;
|
||||
}
|
||||
const float16_t *in_ptr = input + n + para->in_offset_[0];
|
||||
float16_t *out_ptr = output + n;
|
||||
int64_t out_dist_stride = MSMIN(out_batch - task_id * task_id_stride, task_id_stride);
|
||||
memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_dist_stride);
|
||||
}
|
||||
|
||||
void Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
|
||||
const int in_height = para->in_shape_[1];
|
||||
const int out_batch = para->out_shape_[0];
|
||||
const int out_height = para->out_shape_[1];
|
||||
const int thread_count = para->thread_count_;
|
||||
int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height;
|
||||
if (task_id_stride <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int n = 0; n < out_batch; n++) {
|
||||
int h = task_id * task_id_stride;
|
||||
if (h >= out_height) {
|
||||
return;
|
||||
}
|
||||
const float16_t *in_ptr = input + (n + para->in_offset_[0]) * in_height + h + para->in_offset_[1];
|
||||
float16_t *out_ptr = output + n * out_height + h;
|
||||
int64_t out_dist_stride = MSMIN(out_height - task_id * task_id_stride, task_id_stride);
|
||||
memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_dist_stride);
|
||||
}
|
||||
}
|
||||
|
||||
void Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
|
||||
const int in_height = para->in_shape_[1];
|
||||
const int in_width = para->in_shape_[2];
|
||||
|
||||
const int out_batch = para->out_shape_[0];
|
||||
const int out_height = para->out_shape_[1];
|
||||
const int out_width = para->out_shape_[2];
|
||||
|
||||
const int thread_count = para->thread_count_;
|
||||
int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height;
|
||||
if (task_id_stride <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int in_stride_h = in_width;
|
||||
const int in_stride_n = in_stride_h * in_height;
|
||||
|
||||
const int out_stride_h = out_width;
|
||||
const int out_stride_n = out_stride_h * out_height;
|
||||
|
||||
for (int n = 0; n < out_batch; n++) {
|
||||
for (int t = 0; t < task_id_stride; t++) {
|
||||
int h = t + task_id * task_id_stride;
|
||||
if (h >= out_height) {
|
||||
break;
|
||||
}
|
||||
const float16_t *in_ptr =
|
||||
input + (n + para->in_offset_[0]) * in_stride_n + (h + para->in_offset_[1]) * in_stride_h + para->in_offset_[2];
|
||||
float16_t *out_ptr = output + n * out_stride_n + h * out_stride_h;
|
||||
memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_width);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para) {
|
||||
const int in_height = para->in_shape_[1];
|
||||
const int in_width = para->in_shape_[2];
|
||||
const int in_channel = para->in_shape_[3];
|
||||
|
||||
const int out_batch = para->out_shape_[0];
|
||||
const int out_height = para->out_shape_[1];
|
||||
const int out_width = para->out_shape_[2];
|
||||
const int out_channel = para->out_shape_[3];
|
||||
|
||||
const int thread_count = para->thread_count_;
|
||||
int64_t task_id_stride = thread_count > 1 ? UP_DIV(out_height, thread_count) : out_height;
|
||||
if (task_id_stride <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int in_stride_w = in_channel;
|
||||
const int in_stride_h = in_channel * in_width;
|
||||
const int in_stride_n = in_stride_h * in_height;
|
||||
|
||||
const int out_stride_w = out_channel;
|
||||
const int out_stride_h = out_channel * out_width;
|
||||
const int out_stride_n = out_stride_h * out_height;
|
||||
|
||||
for (int n = 0; n < out_batch; n++) {
|
||||
for (int t = 0; t < task_id_stride; t++) {
|
||||
int h = t + task_id * task_id_stride;
|
||||
if (h >= out_height) {
|
||||
break;
|
||||
}
|
||||
for (int w = 0; w < out_width; w++) {
|
||||
const float16_t *in_ptr = input + (n + para->in_offset_[0]) * in_stride_n +
|
||||
(h + para->in_offset_[1]) * in_stride_h + (w + para->in_offset_[2]) * in_stride_w +
|
||||
para->in_offset_[3];
|
||||
float16_t *out_ptr = output + n * out_stride_n + h * out_stride_h + w * out_stride_w;
|
||||
memcpy(out_ptr, in_ptr, sizeof(float16_t) * out_channel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void Crop(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
|
||||
void Crop1D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
|
||||
void Crop2D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
|
||||
void Crop3D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
|
||||
void Crop4D(const float16_t *input, float16_t *output, int task_id, CropParameter *para);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP16_CROP_FP16_H_
|
@ -0,0 +1,130 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/runtime/kernel/arm/fp16/crop_fp16.h"
|
||||
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/crop_parameter.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "nnacl/fp16/crop_fp16.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/kernel/arm/base/crop_base.h"
|
||||
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Crop;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
|
||||
int CropFp16CPUKernel::Init() {
|
||||
auto ret = CropBaseCPUKernel::Init();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int CropFp16CPUKernel::ReSize() { return CropBaseCPUKernel::ReSize(); }
|
||||
|
||||
int CropFp16CPUKernel::DoExecute(int task_id) {
|
||||
Crop(input_ptr_, output_ptr_, task_id, crop_para_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int CropRun(void *cdata, int task_id) {
|
||||
auto g_kernel = reinterpret_cast<CropFp16CPUKernel *>(cdata);
|
||||
auto ret = g_kernel->DoExecute(task_id);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "CropRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CropFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_);
|
||||
if (input_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_);
|
||||
if (output_ptr_ == nullptr) {
|
||||
FreeInputAndOutput();
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = ParallelLaunch(this->context_->thread_pool_, CropRun, this, thread_count_);
|
||||
|
||||
if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) {
|
||||
Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()),
|
||||
out_tensors_.at(kOutputIndex)->ElementsNum());
|
||||
}
|
||||
FreeInputAndOutput();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Crop error error_code[" << ret << "]";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void CropFp16CPUKernel::FreeInputAndOutput() {
|
||||
if (in_tensors_.at(kInputIndex)->data_type() == kNumberTypeFloat32) {
|
||||
context_->allocator->Free(input_ptr_);
|
||||
input_ptr_ = nullptr;
|
||||
}
|
||||
if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) {
|
||||
context_->allocator->Free(output_ptr_);
|
||||
output_ptr_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuCropFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
|
||||
const InnerContext *ctx, const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input opParameter is nullptr!";
|
||||
return nullptr;
|
||||
}
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Crop);
|
||||
auto *kernel = new (std::nothrow) CropFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new CropFp16CPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Crop, CpuCropFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "src/lite_kernel.h"
|
||||
#include "src/runtime/kernel/arm/base/crop_base.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class CropFp16CPUKernel : public CropBaseCPUKernel {
|
||||
public:
|
||||
CropFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: CropBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {
|
||||
crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_);
|
||||
crop_para_->thread_count_ = op_parameter_->thread_num_;
|
||||
}
|
||||
~CropFp16CPUKernel() override = default;
|
||||
|
||||
int Init() override;
|
||||
int ReSize() override;
|
||||
int Run() override;
|
||||
int DoExecute(int task_id);
|
||||
|
||||
private:
|
||||
float16_t *input_ptr_ = nullptr;
|
||||
float16_t *output_ptr_ = nullptr;
|
||||
CropParameter *crop_para_;
|
||||
void FreeInputAndOutput();
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CROP_H_
|
Loading…
Reference in new issue