!6262 optimize cpu op div

Merge pull request !6262 from 陶云浩/lite
pull/6262/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 88dfcda3e6

@ -791,11 +791,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int
int block_c8 = element_size - block_mod;
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vld1q_f16(input0);
float16x8_t vin1 = vld1q_f16(input1);
@ -811,9 +806,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int
output += C8NUM;
}
for (int index = 0; index < block_mod; ++index) {
if (input1[index] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[index] = input0[index] / input1[index];
}
return NNACL_OK;
@ -830,11 +822,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i
#endif
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vin0_opt;
float16x8_t vin1 = vld1q_f16(input1);
@ -849,9 +836,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i
output += C8NUM;
}
for (int index = 0; index < block_mod; ++index) {
if (input1[index] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[index] = in0_opt / input1[index];
}
} else {
@ -886,11 +870,6 @@ int ElementDivReluFp16(float16_t *input0, float16_t *input1, float16_t *output,
float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0};
#endif
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vld1q_f16(input0);
float16x8_t vin1 = vld1q_f16(input1);
@ -928,11 +907,6 @@ int ElementOptDivReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu
#endif
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vin0_opt;
float16x8_t vin1 = vld1q_f16(input1);
@ -985,11 +959,6 @@ int ElementDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *output,
float16x8_t bounds = {6, 6, 6, 6, 6, 6, 6, 6};
#endif
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vld1q_f16(input0);
float16x8_t vin1 = vld1q_f16(input1);
@ -1027,11 +996,6 @@ int ElementOptDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp
#endif
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < block_c8; index += C8NUM) {
for (int i = 0; i < C8NUM; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
}
#ifdef ENABLE_NEON
float16x8_t vin0 = vin0_opt;
float16x8_t vin1 = vld1q_f16(input1);
@ -1088,17 +1052,11 @@ int ElementFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output,
int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size,
ArithmeticParameter *param) {
if (param->in_elements_num1_ == 1) {
if (input1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int i = 0; i < element_size; ++i) {
output[i] = input0[i] - floorf(input0[i] / input1[0]) * input1[0];
}
} else {
for (int i = 0; i < element_size; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i];
}
}
@ -1107,9 +1065,6 @@ int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *outp
int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size) {
for (int i = 0; i < element_size; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = floorf(input0[i] / input1[i]);
}
return NNACL_OK;
@ -1117,17 +1072,11 @@ int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output,
int ElementOptFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size,
ArithmeticParameter *param) {
if (param->in_elements_num1_ == 1) {
if (input1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int i = 0; i < element_size; ++i) {
output[i] = floorf(input0[i] / input1[0]);
}
} else {
for (int i = 0; i < element_size; ++i) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = floorf(input0[i] / input1[i]);
}
}

@ -473,9 +473,6 @@ int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_
int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < element_size; ++index) {
if (input1[index] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[index] = input0[0] / input1[index];
}
} else {
@ -492,16 +489,10 @@ int ElementOptDiv(float *input0, float *input1, float *output, int element_size,
int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < element_size; ++index) {
if (input1[index] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[index] = input0[0] / input1[index];
output[index] = output[index] > 0 ? output[index] : 0;
}
} else {
if (input1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int index = 0; index < element_size; ++index) {
output[index] = input0[index] / input1[0];
output[index] = output[index] > 0 ? output[index] : 0;
@ -513,15 +504,9 @@ int ElementOptDivRelu(float *input0, float *input1, float *output, int element_s
int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) {
if (param->in_elements_num0_ == 1) {
for (int index = 0; index < element_size; ++index) {
if (input1[index] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6);
}
} else {
if (input1[0] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
for (int index = 0; index < element_size; ++index) {
output[index] = MSMIN(MSMAX(input0[index] / input1[0], 0), 6);
}
@ -840,9 +825,6 @@ int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_i
int ElementDiv(float *input0, float *input1, float *output, int element_size) {
for (int i = 0; i < element_size; i++) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = input0[i] / input1[i];
}
return NNACL_OK;
@ -850,9 +832,6 @@ int ElementDiv(float *input0, float *input1, float *output, int element_size) {
int ElementDivRelu(float *input0, float *input1, float *output, int element_size) {
for (int i = 0; i < element_size; i++) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
float res = input0[i] / input1[i];
output[i] = res > 0 ? res : 0;
}
@ -861,9 +840,6 @@ int ElementDivRelu(float *input0, float *input1, float *output, int element_size
int ElementDivRelu6(float *input0, float *input1, float *output, int element_size) {
for (int i = 0; i < element_size; i++) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6);
}
return NNACL_OK;
@ -877,9 +853,6 @@ int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_i
int ElementFloorMod(float *input0, float *input1, float *output, int element_size) {
for (int i = 0; i < element_size; i++) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i];
}
return NNACL_OK;
@ -893,9 +866,6 @@ int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *t
int ElementFloorDiv(float *input0, float *input1, float *output, int element_size) {
for (int i = 0; i < element_size; i++) {
if (input1[i] == 0) {
return NNACL_ERRCODE_DIVISOR_ZERO;
}
output[i] = floorf(input0[i] / input1[i]);
}
return NNACL_OK;

Loading…
Cancel
Save