!6279 [MSLITE][Develop] Fix relu fp16 multiple threads problem

Merge pull request !6279 from sunsuodong/fix_relu_fp16
pull/6279/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 716f6bfdd8

@ -18,24 +18,17 @@
#include "nnacl/errorcode.h" #include "nnacl/errorcode.h"
int ReluFp16(const float16_t *src, float16_t *dst, int ele_num) { int ReluFp16(const float16_t *src, float16_t *dst, int ele_num) {
int eight_block = UP_DIV(ele_num, C8NUM); int offset = 0;
int i;
for (i = 0; i < eight_block - 1; i++) {
int index = i * C8NUM;
#ifdef ENABLE_NEON #ifdef ENABLE_NEON
float16x8_t relu_src = vld1q_f16(src + index); float16x8_t zero = vdupq_n_f16(0);
float16x8_t zero_src = vdupq_n_f16(0); for (; offset <= ele_num - C8NUM; offset += C8NUM) {
relu_src = vmaxq_f16(relu_src, zero_src); float16x8_t src_value = vld1q_f16(src + offset);
vst1q_f16(dst + index, relu_src); float16x8_t rst_value = vmaxq_f16(src_value, zero);
#else vst1q_f16(dst + offset, rst_value);
int j;
for (j = 0; j < C8NUM; j++) {
dst[index + j] = src[index + j] < 0 ? 0 : src[index + j];
}
#endif
} }
for (int j = (eight_block - 1) * C8NUM; j < ele_num; ++j) { #endif
dst[j] = src[j] < 0 ? 0 : src[j]; for (; offset < ele_num; offset++) {
dst[offset] = src[offset] < 0 ? 0 : src[offset];
} }
return NNACL_OK; return NNACL_OK;
} }

Loading…
Cancel
Save