From 28ce5c64f92987a8cbf7a42845a75d1813d3df83 Mon Sep 17 00:00:00 2001 From: tao_yunhao Date: Tue, 15 Sep 2020 16:27:43 +0800 Subject: [PATCH] optimize cpu op div --- mindspore/lite/nnacl/fp16/arithmetic_fp16.c | 51 --------------------- mindspore/lite/nnacl/fp32/arithmetic.c | 30 ------------ 2 files changed, 81 deletions(-) diff --git a/mindspore/lite/nnacl/fp16/arithmetic_fp16.c b/mindspore/lite/nnacl/fp16/arithmetic_fp16.c index a4d6060c78..bae68672fa 100644 --- a/mindspore/lite/nnacl/fp16/arithmetic_fp16.c +++ b/mindspore/lite/nnacl/fp16/arithmetic_fp16.c @@ -791,11 +791,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int int block_c8 = element_size - block_mod; for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vld1q_f16(input0); float16x8_t vin1 = vld1q_f16(input1); @@ -811,9 +806,6 @@ int ElementDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int output += C8NUM; } for (int index = 0; index < block_mod; ++index) { - if (input1[index] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[index] = input0[index] / input1[index]; } return NNACL_OK; @@ -830,11 +822,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i #endif if (param->in_elements_num0_ == 1) { for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vin0_opt; float16x8_t vin1 = vld1q_f16(input1); @@ -849,9 +836,6 @@ int ElementOptDivFp16(float16_t *input0, float16_t *input1, float16_t *output, i output += C8NUM; } for (int index = 0; index < block_mod; ++index) { - if (input1[index] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[index] = in0_opt / input1[index]; } } else { @@ -886,11 +870,6 @@ int ElementDivReluFp16(float16_t *input0, float16_t *input1, float16_t *output, float16x8_t zeros = {0, 0, 0, 0, 0, 0, 0, 0}; #endif for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vld1q_f16(input0); float16x8_t vin1 = vld1q_f16(input1); @@ -928,11 +907,6 @@ int ElementOptDivReluFp16(float16_t *input0, float16_t *input1, float16_t *outpu #endif if (param->in_elements_num0_ == 1) { for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vin0_opt; float16x8_t vin1 = vld1q_f16(input1); @@ -985,11 +959,6 @@ int ElementDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *output, float16x8_t bounds = {6, 6, 6, 6, 6, 6, 6, 6}; #endif for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vld1q_f16(input0); float16x8_t vin1 = vld1q_f16(input1); @@ -1027,11 +996,6 @@ int ElementOptDivRelu6Fp16(float16_t *input0, float16_t *input1, float16_t *outp #endif if (param->in_elements_num0_ == 1) { for (int index = 0; index < block_c8; index += C8NUM) { - for (int i = 0; i < C8NUM; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } - } #ifdef ENABLE_NEON float16x8_t vin0 = vin0_opt; float16x8_t vin1 = vld1q_f16(input1); @@ -1088,17 +1052,11 @@ int ElementFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output, int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size, ArithmeticParameter *param) { if (param->in_elements_num1_ == 1) { - if (input1[0] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } for (int i = 0; i < element_size; ++i) { output[i] = input0[i] - floorf(input0[i] / input1[0]) * input1[0]; } } else { for (int i = 0; i < element_size; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; } } @@ -1107,9 +1065,6 @@ int ElementOptFloorModFp16(float16_t *input0, float16_t *input1, float16_t *outp int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size) { for (int i = 0; i < element_size; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = floorf(input0[i] / input1[i]); } return NNACL_OK; @@ -1117,17 +1072,11 @@ int ElementFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int ElementOptFloorDivFp16(float16_t *input0, float16_t *input1, float16_t *output, int element_size, ArithmeticParameter *param) { if (param->in_elements_num1_ == 1) { - if (input1[0] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } for (int i = 0; i < element_size; ++i) { output[i] = floorf(input0[i] / input1[0]); } } else { for (int i = 0; i < element_size; ++i) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = floorf(input0[i] / input1[i]); } } diff --git a/mindspore/lite/nnacl/fp32/arithmetic.c b/mindspore/lite/nnacl/fp32/arithmetic.c index 669c576eed..65a733897c 100644 --- a/mindspore/lite/nnacl/fp32/arithmetic.c +++ b/mindspore/lite/nnacl/fp32/arithmetic.c @@ -473,9 +473,6 @@ int ElementOptAddRelu6(float *input0, float *input1, float *output, int element_ int ElementOptDiv(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { for (int index = 0; index < element_size; ++index) { - if (input1[index] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[index] = input0[0] / input1[index]; } } else { @@ -492,16 +489,10 @@ int ElementOptDiv(float *input0, float *input1, float *output, int element_size, int ElementOptDivRelu(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { for (int index = 0; index < element_size; ++index) { - if (input1[index] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[index] = input0[0] / input1[index]; output[index] = output[index] > 0 ? output[index] : 0; } } else { - if (input1[0] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } for (int index = 0; index < element_size; ++index) { output[index] = input0[index] / input1[0]; output[index] = output[index] > 0 ? output[index] : 0; @@ -513,15 +504,9 @@ int ElementOptDivRelu(float *input0, float *input1, float *output, int element_s int ElementOptDivRelu6(float *input0, float *input1, float *output, int element_size, ArithmeticParameter *param) { if (param->in_elements_num0_ == 1) { for (int index = 0; index < element_size; ++index) { - if (input1[index] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[index] = MSMIN(MSMAX(input0[0] / input1[index], 0), 6); } } else { - if (input1[0] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } for (int index = 0; index < element_size; ++index) { output[index] = MSMIN(MSMAX(input0[index] / input1[0], 0), 6); } @@ -840,9 +825,6 @@ int BroadcastSub(float *input0, float *input1, float *tile_input0, float *tile_i int ElementDiv(float *input0, float *input1, float *output, int element_size) { for (int i = 0; i < element_size; i++) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = input0[i] / input1[i]; } return NNACL_OK; @@ -850,9 +832,6 @@ int ElementDiv(float *input0, float *input1, float *output, int element_size) { int ElementDivRelu(float *input0, float *input1, float *output, int element_size) { for (int i = 0; i < element_size; i++) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } float res = input0[i] / input1[i]; output[i] = res > 0 ? res : 0; } @@ -861,9 +840,6 @@ int ElementDivRelu(float *input0, float *input1, float *output, int element_size int ElementDivRelu6(float *input0, float *input1, float *output, int element_size) { for (int i = 0; i < element_size; i++) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = MSMIN(MSMAX(input0[i] / input1[i], 0), 6); } return NNACL_OK; @@ -877,9 +853,6 @@ int BroadcastDiv(float *input0, float *input1, float *tile_input0, float *tile_i int ElementFloorMod(float *input0, float *input1, float *output, int element_size) { for (int i = 0; i < element_size; i++) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = input0[i] - floorf(input0[i] / input1[i]) * input1[i]; } return NNACL_OK; @@ -893,9 +866,6 @@ int BroadcastFloorMod(float *input0, float *input1, float *tile_input0, float *t int ElementFloorDiv(float *input0, float *input1, float *output, int element_size) { for (int i = 0; i < element_size; i++) { - if (input1[i] == 0) { - return NNACL_ERRCODE_DIVISOR_ZERO; - } output[i] = floorf(input0[i] / input1[i]); } return NNACL_OK;