|
|
|
@ -791,11 +791,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int
|
|
|
|
|
int block_c8 = element_size - block_mod;
|
|
|
|
|
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vld1q_f16(input0);
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -811,9 +806,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int
|
|
|
|
|
output += C8NUM;
|
|
|
|
|
}
|
|
|
|
|
for (int index = 0; index < block_mod; ++index) {
|
|
|
|
|
if (input1[index] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
output[index] = input0[index] / input1[index];
|
|
|
|
|
}
|
|
|
|
|
return NNACL_OK;
|
|
|
|
@ -830,11 +822,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i
|
|
|
|
|
#endif
|
|
|
|
|
if (param->in_elements_num0_ == 1) {
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vin0_opt;
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -849,9 +836,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i
|
|
|
|
|
output += C8NUM;
|
|
|
|
|
}
|
|
|
|
|
for (int index = 0; index < block_mod; ++index) {
|
|
|
|
|
if (input1[index] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
output[index] = in0_opt / input1[index];
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
@ -886,11 +870,6 @@ int ElementDivReluFp16(float16_t *input0, float16_t *input1, float16_t *output,
|
|
|
|
|
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
|
#endif
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vld1q_f16(input0);
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -928,11 +907,6 @@ int ElementOptDivReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu
|
|
|
|
|
#endif
|
|
|
|
|
if (param->in_elements_num0_ == 1) {
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vin0_opt;
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -985,11 +959,6 @@ int ElementDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *output,
|
|
|
|
|
float16x8_t bounds = {6, 6, 6, 6, 6, 6, 6, 6};
|
|
|
|
|
#endif
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vld1q_f16(input0);
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -1027,11 +996,6 @@ int ElementOptDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp
|
|
|
|
|
#endif
|
|
|
|
|
if (param->in_elements_num0_ == 1) {
|
|
|
|
|
for (int index = 0; index < block_c8; index += C8NUM) {
|
|
|
|
|
for (int i = 0; i < C8NUM; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef ENABLE_NEON
|
|
|
|
|
float16x8_t vin0 = vin0_opt;
|
|
|
|
|
float16x8_t vin1 = vld1q_f16(input1);
|
|
|
|
@ -1088,17 +1052,11 @@ int ElementFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output,
|
|
|
|
|
int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size,
|
|
|
|
|
ArithmeticParameter *param) {
|
|
|
|
|
if (param->in_elements_num1_ == 1) {
|
|
|
|
|
if (input1[0] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < element_size; ++i) {
|
|
|
|
|
output[i] = input0[i] - floorf(input0[i] / input1[0]) * input1[0];
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (int i = 0; i < element_size; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -1107,9 +1065,6 @@ int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *outp
|
|
|
|
|
|
|
|
|
|
int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size) {
|
|
|
|
|
for (int i = 0; i < element_size; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
output[i] = floorf(input0[i] / input1[i]);
|
|
|
|
|
}
|
|
|
|
|
return NNACL_OK;
|
|
|
|
@ -1117,17 +1072,11 @@ int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output,
|
|
|
|
|
int ElementOptFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size,
|
|
|
|
|
ArithmeticParameter *param) {
|
|
|
|
|
if (param->in_elements_num1_ == 1) {
|
|
|
|
|
if (input1[0] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < element_size; ++i) {
|
|
|
|
|
output[i] = floorf(input0[i] / input1[0]);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (int i = 0; i < element_size; ++i) {
|
|
|
|
|
if (input1[i] == 0) {
|
|
|
|
|
return NNACL_ERRCODE_DIVISOR_ZERO;
|
|
|
|
|
}
|
|
|
|
|
output[i] = floorf(input0[i] / input1[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|