|
|
|
@ -19,12 +19,48 @@
|
|
|
|
|
#include "nnacl/errorcode.h"
|
|
|
|
|
|
|
|
|
|
void CalculateCoordinate(float out, int in, int *bottom, int *top, float *bottom_weight) {
|
|
|
|
|
*bottom = (int)(floor(out));
|
|
|
|
|
*bottom = (int)(floorf(out));
|
|
|
|
|
*top = *bottom + 1 < in ? (*bottom + 1) : (in - 1);
|
|
|
|
|
float top_weight = (float)out - (float)(*bottom);
|
|
|
|
|
*bottom_weight = 1.0f - top_weight;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void BicubicBaseFunc(float a, const float x, float *weight) {
|
|
|
|
|
if (x > 1 && x < 2) {
|
|
|
|
|
weight[0] = a * x * x * x - 5 * a * x * x + 8 * a * x - 4 * a;
|
|
|
|
|
} else if (x >= 0 && x <= 1) {
|
|
|
|
|
weight[0] = ((a + 2) * x - (a + 3)) * x * x + 1;
|
|
|
|
|
} else {
|
|
|
|
|
weight[0] = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// a is a coefficient
|
|
|
|
|
// W(x) = { (a + 2) * |x| * |x| * |x| - (a + 3) * |x| * |x| + 1, for |x| <= 1
|
|
|
|
|
// { a * |x| * |x| * |x| - 5 * a * |x| * |x| + 8 * a *|x| - 4 * a, for 1 < |x| < 2
|
|
|
|
|
// { 0, otherwise
|
|
|
|
|
// the value of 'a' depends on if is half_pixel_center(the scheme is the same as tf).
|
|
|
|
|
// If is half pixel mode, a equals to -0.5, otherwise -0.75.
|
|
|
|
|
void CalculateWightForBicubic(float out, int in, int *bottom, int *top, float *weights, float a) {
|
|
|
|
|
// can not exchange the order of calculating bottom[1] and bottom[0], because the order is decided outside.
|
|
|
|
|
bottom[1] = (int)(floorf(out));
|
|
|
|
|
bottom[0] = (bottom[1] - 1) < 0 ? 0 : (bottom[1] - 1);
|
|
|
|
|
top[0] = (bottom[1] + 1) < in ? (bottom[1] + 1) : (in - 1);
|
|
|
|
|
top[1] = (top[0] + 1) < in ? (top[0] + 1) : (in - 1);
|
|
|
|
|
|
|
|
|
|
// get positive value
|
|
|
|
|
float distance[4] = {1, 0, 1, 2};
|
|
|
|
|
float tmp_dis = out - (float)bottom[1];
|
|
|
|
|
distance[0] += tmp_dis;
|
|
|
|
|
distance[1] += tmp_dis;
|
|
|
|
|
distance[2] -= tmp_dis;
|
|
|
|
|
distance[3] -= tmp_dis;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
|
BicubicBaseFunc(a, distance[i], &weights[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int PrepareResizeBilinear(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
|
|
|
|
|
int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_bottom_weights,
|
|
|
|
|
float *x_left_weights) {
|
|
|
|
@ -50,6 +86,30 @@ int PrepareResizeBilinear(const int *input_shape, const int *output_shape, Calcu
|
|
|
|
|
return NNACL_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int PrepareResizeBicubic(const int *input_shape, const int *output_shape, CalculateOriginalCoordinate calculate,
|
|
|
|
|
int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights, float *y_weights, float *x_weights,
|
|
|
|
|
float cubic_coeff) {
|
|
|
|
|
if (input_shape == NULL || output_shape == NULL || y_bottoms == NULL || y_tops == NULL || x_lefts == NULL ||
|
|
|
|
|
x_rights == NULL || y_weights == NULL || x_weights == NULL) {
|
|
|
|
|
return NNACL_NULL_PTR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int in_h = input_shape[1];
|
|
|
|
|
int in_w = input_shape[2];
|
|
|
|
|
int new_height = output_shape[1];
|
|
|
|
|
int new_width = output_shape[2];
|
|
|
|
|
|
|
|
|
|
for (int h = 0; h < new_height; h++) {
|
|
|
|
|
float actual_y = calculate(h, in_h, new_height);
|
|
|
|
|
CalculateWightForBicubic(actual_y, in_h, y_bottoms + 2 * h, y_tops + 2 * h, y_weights + 4 * h, cubic_coeff);
|
|
|
|
|
}
|
|
|
|
|
for (int w = 0; w < new_width; w++) {
|
|
|
|
|
float actual_x = calculate(w, in_w, new_width);
|
|
|
|
|
CalculateWightForBicubic(actual_x, in_w, x_lefts + 2 * w, x_rights + 2 * w, x_weights + 4 * w, cubic_coeff);
|
|
|
|
|
}
|
|
|
|
|
return NNACL_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int PrepareCropAndResizeBilinear(const int *input_shape, const float *boxes, const int *box_idx,
|
|
|
|
|
const int *output_shape, int *y_bottoms, int *y_tops, int *x_lefts, int *x_rights,
|
|
|
|
|
float *y_bottom_weights, float *x_left_weights) {
|
|
|
|
@ -222,6 +282,126 @@ int ResizeBilinear(const float *input_data, float *output_data, const int *input
|
|
|
|
|
return NNACL_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BicubicInterpRow(const float *src, float *dst, int len, const float *weights, const int *lefts, const int *rights,
|
|
|
|
|
int in_c) {
|
|
|
|
|
int l = 0;
|
|
|
|
|
for (; l < len; l++) {
|
|
|
|
|
const float weight1 = weights[4 * l];
|
|
|
|
|
const float weight2 = weights[4 * l + 1];
|
|
|
|
|
const float weight3 = weights[4 * l + 2];
|
|
|
|
|
const float weight4 = weights[4 * l + 3];
|
|
|
|
|
int c = 0;
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float32x4_t weight1_vec = vdupq_n_f32(weight1);
|
|
|
|
|
float32x4_t weight2_vec = vdupq_n_f32(weight2);
|
|
|
|
|
float32x4_t weight3_vec = vdupq_n_f32(weight3);
|
|
|
|
|
float32x4_t weight4_vec = vdupq_n_f32(weight4);
|
|
|
|
|
|
|
|
|
|
for (; c <= in_c - 4; c += 4) {
|
|
|
|
|
float32x4_t src1_vec = vld1q_f32(src + lefts[2 * l] * in_c + c);
|
|
|
|
|
float32x4_t src2_vec = vld1q_f32(src + lefts[2 * l + 1] * in_c + c);
|
|
|
|
|
float32x4_t src3_vec = vld1q_f32(src + rights[2 * l] * in_c + c);
|
|
|
|
|
float32x4_t src4_vec = vld1q_f32(src + rights[2 * l + 1] * in_c + c);
|
|
|
|
|
|
|
|
|
|
float32x4_t interp_value =
|
|
|
|
|
src1_vec * weight1_vec + src2_vec * weight2_vec + src3_vec * weight3_vec + src4_vec * weight4_vec;
|
|
|
|
|
vst1q_f32(dst + l * in_c + c, interp_value);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
int pos1 = lefts[2 * l] * in_c;
|
|
|
|
|
int pos2 = lefts[2 * l + 1] * in_c;
|
|
|
|
|
int pos3 = rights[2 * l] * in_c;
|
|
|
|
|
int pos4 = rights[2 * l + 1] * in_c;
|
|
|
|
|
|
|
|
|
|
for (; c < in_c; c++) {
|
|
|
|
|
float value1 = src[pos1 + c];
|
|
|
|
|
float value2 = src[pos2 + c];
|
|
|
|
|
float value3 = src[pos3 + c];
|
|
|
|
|
float value4 = src[pos4 + c];
|
|
|
|
|
dst[l * in_c + c] = value1 * weight1 + value2 * weight2 + value3 * weight3 + value4 * weight4;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BicubicInterpCol(const float *src1, const float *src2, const float *src3, const float *src4, float *dst, int len,
|
|
|
|
|
const float *weights, int in_c) {
|
|
|
|
|
int l = 0;
|
|
|
|
|
for (; l < len; l++) {
|
|
|
|
|
int c = 0;
|
|
|
|
|
int l_stride = l * in_c;
|
|
|
|
|
const float weight1 = weights[4 * l];
|
|
|
|
|
const float weight2 = weights[4 * l + 1];
|
|
|
|
|
const float weight3 = weights[4 * l + 2];
|
|
|
|
|
const float weight4 = weights[4 * l + 3];
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float32x4_t weight1_vec = vdupq_n_f32(weight1);
|
|
|
|
|
float32x4_t weight2_vec = vdupq_n_f32(weight2);
|
|
|
|
|
float32x4_t weight3_vec = vdupq_n_f32(weight3);
|
|
|
|
|
float32x4_t weight4_vec = vdupq_n_f32(weight4);
|
|
|
|
|
|
|
|
|
|
for (; c <= in_c - 4; c += 4) {
|
|
|
|
|
float32x4_t src1_vec = vld1q_f32(src1 + l_stride + c);
|
|
|
|
|
float32x4_t src2_vec = vld1q_f32(src2 + l_stride + c);
|
|
|
|
|
float32x4_t src3_vec = vld1q_f32(src3 + l_stride + c);
|
|
|
|
|
float32x4_t src4_vec = vld1q_f32(src4 + l_stride + c);
|
|
|
|
|
float32x4_t interp_value =
|
|
|
|
|
src1_vec * weight1_vec + src2_vec * weight2_vec + src3_vec * weight3_vec + src4_vec * weight4_vec;
|
|
|
|
|
vst1q_f32(dst + l_stride + c, interp_value);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
for (; c < in_c; c++) {
|
|
|
|
|
float value1 = src1[l_stride + c];
|
|
|
|
|
float value2 = src2[l_stride + c];
|
|
|
|
|
float value3 = src3[l_stride + c];
|
|
|
|
|
float value4 = src4[l_stride + c];
|
|
|
|
|
dst[l_stride + c] = value1 * weight1 + value2 * weight2 + value3 * weight3 + value4 * weight4;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Bicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
|
|
|
|
const int *y_bottom, const int *y_top, const int *x_lefts, const int *x_rights, const float *y_weights,
|
|
|
|
|
const float *x_weights, float *line_buffer, const int h_begin, const int h_end) {
|
|
|
|
|
int in_w = input_shape[2];
|
|
|
|
|
int in_c = input_shape[3];
|
|
|
|
|
int new_width = output_shape[2];
|
|
|
|
|
int h_stride = new_width * in_c;
|
|
|
|
|
|
|
|
|
|
float *line_array[4] = {line_buffer, line_buffer + h_stride, line_buffer + 2 * h_stride, line_buffer + 3 * h_stride};
|
|
|
|
|
for (int h = h_begin; h < h_end; h++) {
|
|
|
|
|
for (int i = 0; i < 2; ++i) {
|
|
|
|
|
BicubicInterpRow(input_data + y_bottom[2 * h + i] * in_w * in_c, line_array[i], new_width, x_weights, x_lefts,
|
|
|
|
|
x_rights, in_c);
|
|
|
|
|
}
|
|
|
|
|
for (int j = 0; j < 2; ++j) {
|
|
|
|
|
BicubicInterpRow(input_data + y_top[2 * h + j] * in_w * in_c, line_array[j + 2], new_width, x_weights, x_lefts,
|
|
|
|
|
x_rights, in_c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BicubicInterpCol(line_array[0], line_array[1], line_array[2], line_array[3], output_data + h * h_stride, new_width,
|
|
|
|
|
y_weights, in_c);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ResizeBicubic(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
|
|
|
|
const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
|
|
|
|
|
const float *y_weights, const float *x_weights, float *line_buffer, const int h_begin,
|
|
|
|
|
const int h_end) {
|
|
|
|
|
if (input_data == NULL || output_data == NULL || input_shape == NULL || output_shape == NULL || y_bottoms == NULL ||
|
|
|
|
|
y_tops == NULL || x_lefts == NULL || x_rights == NULL || y_weights == NULL || x_weights == NULL) {
|
|
|
|
|
return NNACL_NULL_PTR;
|
|
|
|
|
}
|
|
|
|
|
int input_cube_per_batch = input_shape[1] * input_shape[2] * input_shape[3];
|
|
|
|
|
int output_cube_per_batch = output_shape[1] * output_shape[2] * input_shape[3];
|
|
|
|
|
for (int b = 0; b < input_shape[0]; b++) {
|
|
|
|
|
const float *input = input_data + b * input_cube_per_batch;
|
|
|
|
|
float *output = output_data + b * output_cube_per_batch;
|
|
|
|
|
Bicubic(input, output, input_shape, output_shape, y_bottoms, y_tops, x_lefts, x_rights, y_weights, x_weights,
|
|
|
|
|
line_buffer, h_begin, h_end);
|
|
|
|
|
}
|
|
|
|
|
return NNACL_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int CropAndResizeBilinear(const float *input_data, float *output_data, const int *input_shape, const int *output_shape,
|
|
|
|
|
const int *y_bottoms, const int *y_tops, const int *x_lefts, const int *x_rights,
|
|
|
|
|
const float *y_bottom_weights, const float *x_left_weights, float *line0, float *line1,
|
|
|
|
@ -260,17 +440,17 @@ int ResizeNearestNeighbor(const float *input_data, float *output_data, const int
|
|
|
|
|
float actual_y = calculate(y, input_shape[1], output_shape[1]);
|
|
|
|
|
int input_y;
|
|
|
|
|
if (align_corners) {
|
|
|
|
|
input_y = (int)(round(actual_y));
|
|
|
|
|
input_y = (int)(roundf(actual_y));
|
|
|
|
|
} else {
|
|
|
|
|
input_y = (int)(floor(actual_y));
|
|
|
|
|
input_y = (int)(floorf(actual_y));
|
|
|
|
|
}
|
|
|
|
|
for (int x = 0; x < output_shape[2]; x++) {
|
|
|
|
|
float actual_x = calculate(x, input_shape[2], output_shape[2]);
|
|
|
|
|
int input_x;
|
|
|
|
|
if (align_corners) {
|
|
|
|
|
input_x = (int)(round(actual_x));
|
|
|
|
|
input_x = (int)(roundf(actual_x));
|
|
|
|
|
} else {
|
|
|
|
|
input_x = (int)(floor(actual_x));
|
|
|
|
|
input_x = (int)(floorf(actual_x));
|
|
|
|
|
}
|
|
|
|
|
int in_offset = offset(input_shape, batch, input_y, input_x, 0);
|
|
|
|
|
int out_offset = offset(output_shape, batch, y, x, 0);
|
|
|
|
|