|
|
@ -16,7 +16,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
#include "nnacl/fp16/scale_fp16.h"
|
|
|
|
#include "nnacl/fp16/scale_fp16.h"
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleInner(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleInner(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
for (int out = outer_start; out < outer_end; out++) {
|
|
|
|
for (int out = outer_start; out < outer_end; out++) {
|
|
|
|
int out_offset = out * axis_size * inner_size;
|
|
|
|
int out_offset = out * axis_size * inner_size;
|
|
|
@ -42,7 +42,7 @@ void ScaleInner(float16_t *in_data, float16_t *out_data, float16_t *scale, float
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleAxis(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleAxis(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
for (int out = outer_start; out < outer_end; out++) {
|
|
|
|
for (int out = outer_start; out < outer_end; out++) {
|
|
|
|
int out_offset = out * axis_size;
|
|
|
|
int out_offset = out * axis_size;
|
|
|
@ -71,14 +71,14 @@ void DoScaleFp16(float16_t *in_data, float16_t *out_data, float16_t *scale, floa
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
|
|
|
|
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
ScaleAxis(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
Fp16ScaleAxis(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
ScaleInner(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
Fp16ScaleInner(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
scale_param->inner_size_);
|
|
|
|
scale_param->inner_size_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleInnerRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleInnerRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
@ -108,7 +108,7 @@ void ScaleInnerRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, f
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleAxisRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleAxisRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
@ -135,21 +135,21 @@ void ScaleAxisRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, fl
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void DoScaleReluFp16(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
|
|
|
|
void Fp16DoScaleRelu(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int task_id,
|
|
|
|
ScaleParameter *scale_param) {
|
|
|
|
ScaleParameter *scale_param) {
|
|
|
|
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
|
|
|
int outer_step = UP_DIV(scale_param->outer_size_, scale_param->op_parameter_.thread_num_);
|
|
|
|
int outer_start = task_id * outer_step;
|
|
|
|
int outer_start = task_id * outer_step;
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
|
|
|
|
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
ScaleAxisRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
Fp16ScaleAxisRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
ScaleInnerRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
Fp16ScaleInnerRelu(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
scale_param->inner_size_);
|
|
|
|
scale_param->inner_size_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleInnerRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleInnerRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
int outer_end, int axis_size, int inner_size) {
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
@ -180,7 +180,7 @@ void ScaleInnerRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ScaleAxisRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
void Fp16ScaleAxisRelu6(float16_t *in_data, float16_t *out_data, float16_t *scale, float16_t *offset, int outer_start,
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
int outer_end, int axis_size) {
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
#ifdef ENABLE_ARM64
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
@ -215,9 +215,9 @@ void DoScaleRelu6Fp16(float16_t *in_data, float16_t *out_data, float16_t *scale,
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
int outer_end = MSMIN(outer_start + outer_step, scale_param->outer_size_);
|
|
|
|
|
|
|
|
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
if (scale_param->inner_size_ == 1) {
|
|
|
|
ScaleAxisRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
Fp16ScaleAxisRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_);
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
ScaleInnerRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
Fp16ScaleInnerRelu6(in_data, out_data, scale, offset, outer_start, outer_end, scale_param->axis_size_,
|
|
|
|
scale_param->inner_size_);
|
|
|
|
scale_param->inner_size_);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|