commit
c7f461acc7
@ -0,0 +1,130 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
#include "nnacl/fp16/concat_fp16.h"
|
||||
#include "src/runtime/kernel/arm/fp16/concat_fp16.h"
|
||||
#include "src/kernel_registry.h"
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
|
||||
using mindspore::kernel::KERNEL_ARCH::kCPU;
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Concat;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int ConcatFp16CPUKernel::Init() {
|
||||
auto ret = ConcatBaseCPUKernel::Init();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
if (!InferShapeDone()) {
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
return ReSize();
|
||||
}
|
||||
|
||||
int ConcatFp16CPUKernel::ReSize() {
|
||||
for (auto ptr : fp16_inputs_) {
|
||||
if (ptr != nullptr) {
|
||||
free(ptr);
|
||||
ptr = nullptr;
|
||||
}
|
||||
}
|
||||
fp16_inputs_.clear();
|
||||
for (size_t i = 0; i < in_tensors_.size(); ++i) {
|
||||
float16_t *ptr = nullptr;
|
||||
ptr = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * in_tensors_[i]->ElementsNum()));
|
||||
if (ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
fp16_inputs_.push_back(ptr);
|
||||
}
|
||||
|
||||
if (fp16_output_ != nullptr) {
|
||||
free(fp16_output_);
|
||||
fp16_output_ = nullptr;
|
||||
}
|
||||
fp16_output_ = reinterpret_cast<float16_t *>(malloc(sizeof(float16_t) * out_tensors_[0]->ElementsNum()));
|
||||
if (fp16_output_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return ConcatBaseCPUKernel::ReSize();
|
||||
}
|
||||
|
||||
int ConcatFp16CPUKernel::Run() {
|
||||
auto prepare_ret = Prepare();
|
||||
if (prepare_ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
|
||||
return prepare_ret;
|
||||
}
|
||||
auto input_num = in_tensors_.size();
|
||||
std::vector<float *> inputs_addr(input_num, nullptr);
|
||||
std::vector<int *> inputs_output_shape(input_num + 1, nullptr);
|
||||
|
||||
std::vector<std::vector<int>> shapes;
|
||||
for (size_t i = 0; i < input_num; ++i) {
|
||||
inputs_addr[i] = reinterpret_cast<float *>(in_tensors_[i]->Data());
|
||||
if (inputs_addr[i] == nullptr) {
|
||||
MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
Float32ToFloat16(inputs_addr[i], fp16_inputs_[i], in_tensors_[i]->ElementsNum());
|
||||
shapes.push_back(in_tensors_[i]->shape());
|
||||
inputs_output_shape[i] = shapes[i].data();
|
||||
}
|
||||
auto output_shape = out_tensors_.at(0)->shape();
|
||||
inputs_output_shape[input_num] = output_shape.data();
|
||||
auto output_addr = out_tensors_.at(0)->Data();
|
||||
|
||||
ConcatFp16(reinterpret_cast<void **>(fp16_inputs_.data()), input_num, axis_, inputs_output_shape.data(),
|
||||
output_shape.size(), reinterpret_cast<void *>(fp16_output_));
|
||||
Float16ToFloat32(fp16_output_, reinterpret_cast<float *>(output_addr), out_tensors_.at(0)->ElementsNum());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuConcatFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const Context *ctx,
|
||||
const kernel::KernelKey &desc, const lite::Primitive *primitive) {
|
||||
if (opParameter == nullptr) {
|
||||
MS_LOG(ERROR) << "Input opParameter is nullptr!";
|
||||
return nullptr;
|
||||
}
|
||||
MS_ASSERT(desc.type == schema::PrimitiveType_Concat);
|
||||
auto *kernel = new (std::nothrow) ConcatFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new ConcatCPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Concat, CpuConcatFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <vector>
|
||||
#include "src/lite_kernel.h"
|
||||
#include "include/context.h"
|
||||
#include "src/runtime/kernel/arm/base/concat_base.h"
|
||||
|
||||
using mindspore::lite::Context;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class ConcatFp16CPUKernel : public ConcatBaseCPUKernel {
|
||||
public:
|
||||
ConcatFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
|
||||
const lite::Primitive *primitive)
|
||||
: ConcatBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
|
||||
~ConcatFp16CPUKernel() {
|
||||
for (auto ptr : fp16_inputs_) {
|
||||
if (ptr != nullptr) {
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Init() override;
|
||||
|
||||
int ReSize() override;
|
||||
|
||||
int Run() override;
|
||||
|
||||
private:
|
||||
std::vector<float16_t *> fp16_inputs_;
|
||||
float16_t *fp16_output_ = nullptr;
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_CONCAT_FP16_H_
|
@ -0,0 +1,43 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/concat_fp16.h"
|
||||
#include <string.h>
|
||||
|
||||
void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output) {
|
||||
int before_axis_size = 1;
|
||||
for (int i = 0; i < axis; ++i) {
|
||||
before_axis_size *= inputs_output_shape[0][i];
|
||||
}
|
||||
// sizeof float16 / byte
|
||||
int after_axis_size = 2;
|
||||
for (size_t i = axis + 1; i < shape_size; ++i) {
|
||||
after_axis_size *= inputs_output_shape[0][i];
|
||||
}
|
||||
int axis_offset = 0;
|
||||
uint8_t *dst_base = (output);
|
||||
size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
|
||||
for (int i = 0; i < input_num; ++i) {
|
||||
uint8_t *src_base = (input[i]);
|
||||
size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
|
||||
for (int j = 0; j < before_axis_size; ++j) {
|
||||
uint8_t *src = src_base + j * input_stride;
|
||||
uint8_t *dst = dst_base + j * output_stride + axis_offset * after_axis_size;
|
||||
memcpy(dst, src, input_stride);
|
||||
}
|
||||
axis_offset += inputs_output_shape[i][axis];
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_
|
||||
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
void ConcatFp16(void **input, int input_num, int axis, int **inputs_output_shape, size_t shape_size, void *output);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_CONCAT_FP16_H_
|
Loading…
Reference in new issue