!5890 [MSLITE][Develop] add pad fp16 kernel
Merge pull request !5890 from sunsuodong/pad_fp16pull/5890/MERGE
commit
77cc155e39
@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/pad_fp16.h"
|
||||
#include "nnacl/common_func.h"
|
||||
|
||||
void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape,
|
||||
const int *paddings, const int tid, const int thread_num) {
|
||||
int in[4], out[4];
|
||||
for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
|
||||
out[0] = in[0] + paddings[0];
|
||||
for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) {
|
||||
out[1] = in[1] + paddings[2];
|
||||
for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
|
||||
out[2] = in[2] + paddings[4];
|
||||
float16_t *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
|
||||
const float16_t *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
|
||||
memcpy(dst, src, input_shape[3] * sizeof(float16_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_
|
||||
#define MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape,
|
||||
const int *paddings, const int tid, const int thread_num);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_
|
@ -0,0 +1,101 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp16/pad_fp16.h"
|
||||
#include "src/runtime/kernel/arm/fp16/common_fp16.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "src/runtime/runtime_api.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Pad;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int PadFp16CPUKernel::RunImpl(int task_id) {
|
||||
auto input_data = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData());
|
||||
auto output_data = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
|
||||
|
||||
PadFp16(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int PadFp16CPUKernel::Run() {
|
||||
auto ret = Prepare();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << ret;
|
||||
return ret;
|
||||
}
|
||||
auto input_tensor = in_tensors_.at(0);
|
||||
auto output_tensor = out_tensors_.at(0);
|
||||
is_input_fp32_ = input_tensor->data_type() == kNumberTypeFloat32;
|
||||
is_output_fp32_ = output_tensor->data_type() == kNumberTypeFloat32;
|
||||
|
||||
input_ = ConvertInputFp32toFp16(input_tensor, context_);
|
||||
output_ = MallocOutputFp16(output_tensor, context_);
|
||||
if (input_ == nullptr || output_ == nullptr) {
|
||||
FreeInputAndOutput();
|
||||
MS_LOG(ERROR) << "input or output is nullptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t));
|
||||
ret = ParallelLaunch(THREAD_POOL_DEFAULT, PadImpl, this, op_parameter_->thread_num_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
|
||||
}
|
||||
if (is_output_fp32_) {
|
||||
Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->MutableData()), output_tensor->ElementsNum());
|
||||
}
|
||||
FreeInputAndOutput();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PadFp16CPUKernel::FreeInputAndOutput() {
|
||||
if (is_input_fp32_) {
|
||||
context_->allocator->Free(input_);
|
||||
input_ = nullptr;
|
||||
}
|
||||
if (is_output_fp32_) {
|
||||
context_->allocator->Free(output_);
|
||||
output_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuPadFp16KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
auto *kernel = new (std::nothrow) PadFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new PadFp16CPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Pad, CpuPadFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,45 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32/pad.h"
|
||||
#include "nnacl/fp16/pad_fp16.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class PadFp16CPUKernel : public PadCPUKernel {
|
||||
public:
|
||||
PadFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
|
||||
const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: PadCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
|
||||
~PadFp16CPUKernel() {}
|
||||
|
||||
int Run() override;
|
||||
int RunImpl(int task_id) override;
|
||||
|
||||
private:
|
||||
void FreeInputAndOutput();
|
||||
bool is_input_fp32_ = false;
|
||||
bool is_output_fp32_ = false;
|
||||
float16_t *input_ = nullptr;
|
||||
float16_t *output_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_
|
Loading…
Reference in new issue