parent
dde257592b
commit
9734f2a88e
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "nnacl/fp16/batchnorm_fp16.h"
|
||||
#include <math.h>
|
||||
|
||||
void BatchNormFp16(const void *input, const void *mean, const void *variance,
|
||||
BatchNormParameter *param, int task_id, void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
int cur_offset = completed_units * param->channel_;
|
||||
|
||||
for (int i = 0; i < cur_unit; i++) {
|
||||
for (int c = 0; c < param->channel_; c++) {
|
||||
float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_);
|
||||
((float16_t *)output)[cur_offset + c] =
|
||||
(((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt;
|
||||
}
|
||||
cur_offset += param->channel_;
|
||||
}
|
||||
}
|
||||
|
||||
void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, BatchNormParameter *param, int task_id, void *output) {
|
||||
int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_);
|
||||
int completed_units = task_id * units_per_thread;
|
||||
int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units);
|
||||
int cur_offset = completed_units * param->channel_;
|
||||
|
||||
for (int i = 0; i < cur_unit; i++) {
|
||||
for (int c = 0; c < param->channel_; c++) {
|
||||
float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_);
|
||||
float16_t norm_val = (((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt;
|
||||
((float16_t *)output)[cur_offset + c] = norm_val * ((const float16_t *)scale)[c] + ((const float16_t *)offset)[c];
|
||||
}
|
||||
cur_offset += param->channel_;
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_
|
||||
|
||||
#ifdef ENABLE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#include "nnacl/batchnorm_parameter.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void BatchNormFp16(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id,
|
||||
void *output);
|
||||
void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean,
|
||||
const void *variance, BatchNormParameter *param, int task_id, void *output);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_
|
@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp16/batchnorm_fp16.h"
|
||||
#include "nnacl/fp16/batchnorm_fp16.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::schema::PrimitiveType_BatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int BatchnormFp16CPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
|
||||
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
|
||||
auto input = in_tensors_.at(0);
|
||||
auto mean = in_tensors_.at(1);
|
||||
auto variance = in_tensors_.at(2);
|
||||
auto output = out_tensors_.at(0);
|
||||
|
||||
auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t));
|
||||
auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t));
|
||||
auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t));
|
||||
auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t));
|
||||
if (input_fp16 == nullptr || mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) {
|
||||
context_->allocator->Free(input_fp16);
|
||||
context_->allocator->Free(mean_fp16);
|
||||
context_->allocator->Free(variance_fp16);
|
||||
context_->allocator->Free(output_fp16);
|
||||
}
|
||||
Float32ToFloat16(reinterpret_cast<float *>(input->Data()),
|
||||
reinterpret_cast<float16_t *>(input_fp16), input->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(mean->Data()),
|
||||
reinterpret_cast<float16_t *>(mean_fp16), mean->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(variance->Data()),
|
||||
reinterpret_cast<float16_t *>(variance_fp16), variance->ElementsNum());
|
||||
|
||||
BatchNormFp16(input_fp16, mean_fp16, variance_fp16, param, task_id, output_fp16);
|
||||
|
||||
Float16ToFloat32(reinterpret_cast<float16_t *>(output_fp16), reinterpret_cast<float *>(output),
|
||||
output->ElementsNum());
|
||||
context_->allocator->Free(input_fp16);
|
||||
context_->allocator->Free(mean_fp16);
|
||||
context_->allocator->Free(variance_fp16);
|
||||
context_->allocator->Free(output_fp16);
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
BatchNormFp16(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data());
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *opParameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
auto *kernel = new (std::nothrow) BatchnormFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new BatchnormFp16CPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_));
|
||||
delete kernel;
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,36 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32/batchnorm.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class BatchnormFp16CPUKernel : public BatchnormCPUKernel {
|
||||
public:
|
||||
BatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
virtual ~BatchnormFp16CPUKernel() {}
|
||||
|
||||
virtual int DoExecute(int task_id);
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_
|
@ -0,0 +1,103 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h"
|
||||
#include "nnacl/fp16/batchnorm_fp16.h"
|
||||
#include "nnacl/fp16/cast_fp16.h"
|
||||
#include "src/kernel_registry.h"
|
||||
|
||||
using mindspore::lite::KernelRegistrar;
|
||||
using mindspore::schema::PrimitiveType_FusedBatchNorm;
|
||||
|
||||
namespace mindspore::kernel {
|
||||
int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
|
||||
auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
|
||||
|
||||
if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
|
||||
auto input = in_tensors_.at(0);
|
||||
auto scale = in_tensors_.at(1);
|
||||
auto offset = in_tensors_.at(2);
|
||||
auto mean = in_tensors_.at(3);
|
||||
auto variance = in_tensors_.at(4);
|
||||
auto output = out_tensors_.at(0);
|
||||
|
||||
auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t));
|
||||
auto scale_fp16 = context_->allocator->Malloc(scale->ElementsNum() * sizeof(float16_t));
|
||||
auto offset_fp16 = context_->allocator->Malloc(offset->ElementsNum() * sizeof(float16_t));
|
||||
auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t));
|
||||
auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t));
|
||||
auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t));
|
||||
if (input_fp16 == nullptr || scale_fp16 == nullptr || offset_fp16 == nullptr ||
|
||||
mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) {
|
||||
context_->allocator->Free(input_fp16);
|
||||
context_->allocator->Free(scale_fp16);
|
||||
context_->allocator->Free(offset_fp16);
|
||||
context_->allocator->Free(mean_fp16);
|
||||
context_->allocator->Free(variance_fp16);
|
||||
context_->allocator->Free(output_fp16);
|
||||
}
|
||||
Float32ToFloat16(reinterpret_cast<float *>(input->Data()),
|
||||
reinterpret_cast<float16_t *>(input_fp16), input->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(scale->Data()),
|
||||
reinterpret_cast<float16_t *>(scale_fp16), scale->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(offset->Data()),
|
||||
reinterpret_cast<float16_t *>(offset_fp16), offset->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(mean->Data()),
|
||||
reinterpret_cast<float16_t *>(mean_fp16), mean->ElementsNum());
|
||||
Float32ToFloat16(reinterpret_cast<float *>(variance->Data()),
|
||||
reinterpret_cast<float16_t *>(variance_fp16), variance->ElementsNum());
|
||||
|
||||
FusedBatchNormFp16(input_fp16, scale_fp16, offset_fp16, mean_fp16, variance_fp16, param, task_id,
|
||||
output_fp16);
|
||||
|
||||
Float16ToFloat32(reinterpret_cast<float16_t *>(output_fp16), reinterpret_cast<float *>(output),
|
||||
output->ElementsNum());
|
||||
context_->allocator->Free(input_fp16);
|
||||
context_->allocator->Free(scale_fp16);
|
||||
context_->allocator->Free(offset_fp16);
|
||||
context_->allocator->Free(mean_fp16);
|
||||
context_->allocator->Free(variance_fp16);
|
||||
context_->allocator->Free(output_fp16);
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
FusedBatchNormFp16(in_tensors_.at(0)->Data(), scale_, offset_, mean_, variance_, param, task_id,
|
||||
out_tensors_.at(0)->Data());
|
||||
return mindspore::lite::RET_OK;
|
||||
}
|
||||
|
||||
kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs,
|
||||
OpParameter *op_parameter, const lite::Context *ctx,
|
||||
const kernel::KernelKey &desc,
|
||||
const mindspore::lite::PrimitiveC *primitive) {
|
||||
FusedBatchnormFp16CPUKernel *kernel =
|
||||
new (std::nothrow) FusedBatchnormFp16CPUKernel(op_parameter, inputs, outputs, ctx, primitive);
|
||||
if (kernel == nullptr) {
|
||||
MS_LOG(ERROR) << "new FusedBatchnormFp16CPUKernel fail!";
|
||||
return nullptr;
|
||||
}
|
||||
auto ret = kernel->Init();
|
||||
if (ret != RET_OK) {
|
||||
delete kernel;
|
||||
MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: "
|
||||
<< schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_));
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
|
||||
// REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FusedBatchNorm, CpuFusedBatchnormFp16KernelCreator)
|
||||
} // namespace mindspore::kernel
|
@ -0,0 +1,36 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_
|
||||
|
||||
#include <vector>
|
||||
#include "src/runtime/kernel/arm/fp32/fused_batchnorm.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
class FusedBatchnormFp16CPUKernel : public FusedBatchnormCPUKernel {
|
||||
public:
|
||||
FusedBatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
|
||||
const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx,
|
||||
const mindspore::lite::PrimitiveC *primitive)
|
||||
: FusedBatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {}
|
||||
virtual ~FusedBatchnormFp16CPUKernel() {}
|
||||
|
||||
virtual int DoExecute(int task_id);
|
||||
};
|
||||
} // namespace mindspore::kernel
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_
|
Loading…
Reference in new issue