diff --git a/mindspore/lite/nnacl/fp16/arg_min_max_fp16.c b/mindspore/lite/nnacl/fp16/arg_min_max_fp16.c index 7f7aef0b39..e3031197f2 100644 --- a/mindspore/lite/nnacl/fp16/arg_min_max_fp16.c +++ b/mindspore/lite/nnacl/fp16/arg_min_max_fp16.c @@ -42,15 +42,17 @@ int ArgCompareDescFp16(const void *a, const void *b) { return 0; } -void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, - const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { +void ArgMaxTopK1Fp16(const float16_t *input, void *output, float16_t *output_value, const ArgMinMaxParameter *param, + int pre_axis_count, int axis_count, int after_axis_count) { bool out_value = param->out_value_; + float16_t *outputfp16 = (float16_t *)output; + int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { size_t output_offset = i * after_axis_count; size_t input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float16_t value = -FLT_MAX; - float16_t index = 0.0f; + int index = 0; for (int k = 0; k < axis_count; ++k) { float16_t value_tmp = input[input_offset + k * after_axis_count + j]; if (value_tmp > value) { @@ -58,7 +60,11 @@ void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu index = k; } } - output[output_offset + j] = out_value ? value : index; + if (out_value) { + outputfp16[output_offset + j] = value; + } else { + outputint[output_offset + j] = index; + } if (output_value != NULL) { output_value[output_offset + j] = value; } @@ -66,15 +72,17 @@ void ArgMaxTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu } } -void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, - const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { +void ArgMinTopK1Fp16(const float16_t *input, void *output, float16_t *output_value, const ArgMinMaxParameter *param, + int pre_axis_count, int axis_count, int after_axis_count) { bool out_value = param->out_value_; + float16_t *outputfp16 = (float16_t *)output; + int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { size_t output_offset = i * after_axis_count; size_t input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float16_t value = FLT_MAX; - float16_t index = 0.0f; + int index = 0; for (int k = 0; k < axis_count; ++k) { float16_t value_tmp = input[input_offset + k * after_axis_count + j]; if (value_tmp < value) { @@ -82,7 +90,11 @@ void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu index = k; } } - output[output_offset + j] = out_value ? value : index; + if (out_value) { + outputfp16[output_offset + j] = value; + } else { + outputint[output_offset + j] = index; + } if (output_value != NULL) { output_value[output_offset + j] = value; } @@ -90,29 +102,37 @@ void ArgMinTopK1Fp16(const float16_t *input, float16_t *output, float16_t *outpu } } -void ArgMinMaxDim0Fp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, +void ArgMinMaxDim0Fp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { + float16_t *outputfp16 = (float16_t *)output; + int *outputint = (int *)output; for (int32_t i = 0; i < param->in_strides_[0]; ++i) { for (int j = 0; j < in_shape[0]; ++j) { size_t offset = param->in_strides_[0] * j + i; param->arg_elements_[j].index_ = j; - param->arg_elements_[j].data_.f_data_ = input[offset]; + param->arg_elements_[j].data_.f16_data_ = input[offset]; } qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); for (int j = 0; j < param->topk_; ++j) { size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->out_value_ ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; + if (param->out_value_) { + outputfp16[out_offset] = param->arg_elements_[j].data_.f16_data_; + } else { + outputint[out_offset] = param->arg_elements_[j].index_; + } if (output_value != NULL) { - output_value[out_offset] = param->arg_elements_[j].data_.f_data_; + output_value[out_offset] = param->arg_elements_[j].data_.f16_data_; } } } return; } -void ArgMinMaxDim1Fp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, +void ArgMinMaxDim1Fp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { int in_shape1 = in_shape[1]; + float16_t *outputfp16 = (float16_t *)output; + int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; size_t out_dim0_offset = i * param->out_strides_[0]; @@ -120,14 +140,18 @@ void ArgMinMaxDim1Fp16(const float16_t *input, float16_t *output, float16_t *out for (int k = 0; k < in_shape1; ++k) { size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; param->arg_elements_[k].index_ = k; - param->arg_elements_[k].data_.f_data_ = input[offset]; + param->arg_elements_[k].data_.f16_data_ = input[offset]; } qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); for (int k = 0; k < param->topk_; ++k) { size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->out_value_ ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; + if (param->out_value_) { + outputfp16[out_offset] = param->arg_elements_[k].data_.f16_data_; + } else { + outputint[out_offset] = param->arg_elements_[k].index_; + } if (output_value != NULL) { - output_value[out_offset] = param->arg_elements_[k].data_.f_data_; + output_value[out_offset] = param->arg_elements_[k].data_.f16_data_; } } } @@ -139,6 +163,8 @@ void ArgMinMaxDim2Fp16(const float16_t *input, float16_t *output, float16_t *out const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { int in_shape1 = in_shape[1]; int in_shape2 = in_shape[2]; + float *outputfp16 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; size_t out_dim0_offset = i * param->out_strides_[0]; @@ -149,16 +175,18 @@ void ArgMinMaxDim2Fp16(const float16_t *input, float16_t *output, float16_t *out for (int l = 0; l < in_shape2; ++l) { size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; + param->arg_elements_[l].data_.f16_data_ = input[offset]; } qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - - output[out_offset] = - param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + if (param->out_value_) { + outputfp16[out_offset] = param->arg_elements_[l].data_.f16_data_; + } else { + outputint[out_offset] = param->arg_elements_[l].index_; + } if (output_value != NULL) { - output_value[out_offset] = param->arg_elements_[l].data_.f_data_; + output_value[out_offset] = param->arg_elements_[l].data_.f16_data_; } } } @@ -171,6 +199,8 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out int in_shape1 = in_shape[1]; int in_shape2 = in_shape[2]; int in_shape3 = in_shape[3]; + float *outputfp16 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; size_t out_dim0_offset = i * param->out_strides_[0]; @@ -183,15 +213,18 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out for (int l = 0; l < in_shape3; ++l) { size_t offset = l + in_dim2_offset; param->arg_elements_[l].index_ = l; - param->arg_elements_[l].data_.f_data_ = input[offset]; + param->arg_elements_[l].data_.f16_data_ = input[offset]; } qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { size_t out_offset = out_dim2_offset + l; - output[out_offset] = - param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + if (param->out_value_) { + outputfp16[out_offset] = param->arg_elements_[l].data_.f16_data_; + } else { + outputint[out_offset] = param->arg_elements_[l].index_; + } if (output_value != NULL) { - output_value[out_offset] = param->arg_elements_[l].data_.f_data_; + output_value[out_offset] = param->arg_elements_[l].data_.f16_data_; } } } @@ -199,7 +232,7 @@ void ArgMinMaxDim3Fp16(const float16_t *input, float16_t *output, float16_t *out } } -void ArgMinMaxFp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, +void ArgMinMaxFp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, const ArgMinMaxParameter *param) { if (param->topk_ == 1) { int pre_axis_count = 1; diff --git a/mindspore/lite/nnacl/fp16/arg_min_max_fp16.h b/mindspore/lite/nnacl/fp16/arg_min_max_fp16.h index a7fecebdbe..e969b1e627 100644 --- a/mindspore/lite/nnacl/fp16/arg_min_max_fp16.h +++ b/mindspore/lite/nnacl/fp16/arg_min_max_fp16.h @@ -23,7 +23,7 @@ #ifdef __cplusplus extern "C" { #endif -void ArgMinMaxFp16(const float16_t *input, float16_t *output, float16_t *output_value, const int *in_shape, +void ArgMinMaxFp16(const float16_t *input, void *output, float16_t *output_value, const int *in_shape, const ArgMinMaxParameter *param); #ifdef __cplusplus } diff --git a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c index dd4024d49c..21ea965808 100644 --- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c +++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.c @@ -43,15 +43,17 @@ int ArgCompareDescFp32(const void *a, const void *b) { return 0; } -void ArgMaxTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param, +void ArgMaxTopK1(const float *input, void *output, float *output_value, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { bool out_value = param->out_value_; + float *outputfp32 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { size_t output_offset = i * after_axis_count; size_t input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float value = -FLT_MAX; - float index = 0.0f; + int index = 0; for (int k = 0; k < axis_count; ++k) { float value_tmp = input[input_offset + k * after_axis_count + j]; if (value_tmp > value) { @@ -59,7 +61,11 @@ void ArgMaxTopK1(const float *input, float *output, float *output_value, const A index = k; } } - output[output_offset + j] = out_value ? value : index; + if (out_value) { + outputfp32[output_offset + j] = value; + } else { + outputint[output_offset + j] = index; + } if (output_value != NULL) { output_value[output_offset + j] = value; } @@ -67,15 +73,17 @@ void ArgMaxTopK1(const float *input, float *output, float *output_value, const A } } -void ArgMinTopK1(const float *input, float *output, float *output_value, const ArgMinMaxParameter *param, +void ArgMinTopK1(const float *input, void *output, float *output_value, const ArgMinMaxParameter *param, int pre_axis_count, int axis_count, int after_axis_count) { bool out_value = param->out_value_; + float *outputfp32 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { size_t output_offset = i * after_axis_count; size_t input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float value = FLT_MAX; - float index = 0.0f; + int index = 0; for (int k = 0; k < axis_count; ++k) { float value_tmp = input[input_offset + k * after_axis_count + j]; if (value_tmp < value) { @@ -83,7 +91,11 @@ void ArgMinTopK1(const float *input, float *output, float *output_value, const A index = k; } } - output[output_offset + j] = out_value ? value : index; + if (out_value) { + outputfp32[output_offset + j] = value; + } else { + outputint[output_offset + j] = index; + } if (output_value != NULL) { output_value[output_offset + j] = value; } @@ -91,8 +103,10 @@ void ArgMinTopK1(const float *input, float *output, float *output_value, const A } } -void ArgMinMaxDim0(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxDim0(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { + float *outputfp32 = (float *)output; + int *outputint = (int *)output; for (int32_t i = 0; i < param->in_strides_[0]; ++i) { for (int j = 0; j < in_shape[0]; ++j) { size_t offset = param->in_strides_[0] * j + i; @@ -102,7 +116,11 @@ void ArgMinMaxDim0(const float *input, float *output, float *output_value, const qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); for (int j = 0; j < param->topk_; ++j) { size_t out_offset = j * param->out_strides_[0] + i; - output[out_offset] = param->out_value_ ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; + if (param->out_value_) { + outputfp32[out_offset] = param->arg_elements_[j].data_.f_data_; + } else { + outputint[out_offset] = param->arg_elements_[j].index_; + } if (output_value != NULL) { output_value[out_offset] = param->arg_elements_[j].data_.f_data_; } @@ -111,8 +129,10 @@ void ArgMinMaxDim0(const float *input, float *output, float *output_value, const return; } -void ArgMinMaxDim1(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxDim1(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { + float *outputfp32 = (float *)output; + int *outputint = (int *)output; int in_shape1 = in_shape[1]; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; @@ -126,7 +146,11 @@ void ArgMinMaxDim1(const float *input, float *output, float *output_value, const qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); for (int k = 0; k < param->topk_; ++k) { size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; - output[out_offset] = param->out_value_ ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; + if (param->out_value_) { + outputfp32[out_offset] = param->arg_elements_[k].data_.f_data_; + } else { + outputint[out_offset] = param->arg_elements_[k].index_; + } if (output_value != NULL) { output_value[out_offset] = param->arg_elements_[k].data_.f_data_; } @@ -136,10 +160,12 @@ void ArgMinMaxDim1(const float *input, float *output, float *output_value, const return; } -void ArgMinMaxDim2(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxDim2(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { int in_shape1 = in_shape[1]; int in_shape2 = in_shape[2]; + float *outputfp32 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; size_t out_dim0_offset = i * param->out_strides_[0]; @@ -155,9 +181,11 @@ void ArgMinMaxDim2(const float *input, float *output, float *output_value, const qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; - - output[out_offset] = - param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + if (param->out_value_) { + outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; + } else { + outputint[out_offset] = param->arg_elements_[l].index_; + } if (output_value != NULL) { output_value[out_offset] = param->arg_elements_[l].data_.f_data_; } @@ -167,11 +195,13 @@ void ArgMinMaxDim2(const float *input, float *output, float *output_value, const } } -void ArgMinMaxDim3(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxDim3(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param, COMPARE_FUNCTION compare_func) { int in_shape1 = in_shape[1]; int in_shape2 = in_shape[2]; int in_shape3 = in_shape[3]; + float *outputfp32 = (float *)output; + int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { size_t in_dim0_offset = i * param->in_strides_[0]; size_t out_dim0_offset = i * param->out_strides_[0]; @@ -189,8 +219,11 @@ void ArgMinMaxDim3(const float *input, float *output, float *output_value, const qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { size_t out_offset = out_dim2_offset + l; - output[out_offset] = - param->out_value_ ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; + if (param->out_value_) { + outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; + } else { + outputint[out_offset] = param->arg_elements_[l].index_; + } if (output_value != NULL) { output_value[out_offset] = param->arg_elements_[l].data_.f_data_; } @@ -200,7 +233,7 @@ void ArgMinMaxDim3(const float *input, float *output, float *output_value, const } } -void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxFp32(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param) { if (param->topk_ == 1) { int pre_axis_count = 1; diff --git a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h index 18146432c6..509e85203f 100644 --- a/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h +++ b/mindspore/lite/nnacl/fp32/arg_min_max_fp32.h @@ -22,7 +22,7 @@ #ifdef __cplusplus extern "C" { #endif -void ArgMinMaxFp32(const float *input, float *output, float *output_value, const int *in_shape, +void ArgMinMaxFp32(const float *input, void *output, float *output_value, const int *in_shape, const ArgMinMaxParameter *param); #ifdef __cplusplus } diff --git a/mindspore/lite/nnacl/fp32/instance_norm_fp32.c b/mindspore/lite/nnacl/fp32/instance_norm_fp32.c index 87fa299ee3..aaaf42c5bb 100644 --- a/mindspore/lite/nnacl/fp32/instance_norm_fp32.c +++ b/mindspore/lite/nnacl/fp32/instance_norm_fp32.c @@ -33,21 +33,26 @@ int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data for (int c = channel_begin; c < channel_end; c++) { const float *src = src_b + c * param->inner_size_; float *dst = dst_b + c * param->inner_size_; - float mean = 0.0f; - float square_mean = 0.0f; + double mean = 0.0f; + double square_mean = 0.0f; int index = 0; #ifdef ENABLE_NEON - float32x4_t sum = vdupq_n_f32(0); - float32x4_t square_sum = vdupq_n_f32(0); for (; index < param->inner_size_ - C4NUM; index += C4NUM) { float32x4_t srcv = vld1q_f32(src + index); float32x4_t squarev = vmulq_f32(srcv, srcv); - sum = vaddq_f32(sum, srcv); - square_sum = vaddq_f32(square_sum, squarev); +#ifdef ENABLE_ARM64 + mean += vaddvq_f32(srcv); + square_mean += vaddvq_f32(squarev); +#else + float32x2_t src_add2 = vadd_f32(vget_low_f32(srcv), vget_high_f32(srcv)); + float32x2_t src_add4 = vpadd_f32(src_add2, src_add2); + mean += vget_lane_f32(src_add4, 0); + float32x2_t square_add2 = vadd_f32(vget_low_f32(squarev), vget_high_f32(squarev)); + float32x2_t square_add4 = vpadd_f32(square_add2, square_add2); + square_mean += vget_lane_f32(square_add4, 0); +#endif } - mean = sum[0] + sum[1] + sum[2] + sum[3]; - square_mean = square_sum[0] + square_sum[1] + square_sum[2] + square_sum[3]; #endif for (; index < param->inner_size_; index++) { mean += src[index]; @@ -56,27 +61,11 @@ int InstanceNorm(const float *src_data, float *dst_data, const float *gamma_data mean /= (float)param->inner_size_; square_mean /= (float)param->inner_size_; - const float deno = 1 / sqrtf(square_mean - mean * mean + param->epsilon_); + const double deno = gamma_data[c] / sqrt(square_mean - mean * mean + param->epsilon_); index = 0; -#ifdef ENABLE_NEON - float32x4_t meanv = vdupq_n_f32(mean); - float32x4_t denov = vdupq_n_f32(deno); - for (; index < param->inner_size_ - C4NUM; index += C4NUM) { - float32x4_t srcv = vld1q_f32(src + index); - float32x4_t outv = vsubq_f32(srcv, meanv); - outv = vmulq_f32(outv, denov); - - float32x4_t gammav = vdupq_n_f32(gamma_data[c]); - float32x4_t betav = vdupq_n_f32(beta_data[c]); - outv = vmulq_f32(outv, gammav); - outv = vaddq_f32(outv, betav); - vst1q_f32(dst + index, outv); - } -#endif for (; index < param->inner_size_; index++) { - dst[index] = (src[index] - mean) * deno; - dst[index] = dst[index] * gamma_data[c] + beta_data[c]; + dst[index] = (src[index] - mean) * deno + beta_data[c]; } } } diff --git a/mindspore/lite/src/ops/addn.cc b/mindspore/lite/src/ops/addn.cc index 20bbd6eeb3..387c38b62f 100644 --- a/mindspore/lite/src/ops/addn.cc +++ b/mindspore/lite/src/ops/addn.cc @@ -86,9 +86,10 @@ int AddN::InferShape(std::vector inputs, std::vector outputs for (size_t i = 1; i < inputs.size(); ++i) { if (inputs.at(i)->shape().size() > max_dims) { max_dims = inputs.at(i)->shape().size(); - max_dims_idx = 0; + max_dims_idx = i; } } + output->set_shape(inputs.at(max_dims_idx)->shape()); // make sure all elements have the same size or 1 (broadcasting) in all dimensions diff --git a/mindspore/lite/src/ops/argmax.cc b/mindspore/lite/src/ops/argmax.cc index e78ba4160c..b189b3d1d6 100644 --- a/mindspore/lite/src/ops/argmax.cc +++ b/mindspore/lite/src/ops/argmax.cc @@ -95,13 +95,17 @@ int ArgMax::InferShape(std::vector inputs_, std::vector outp MS_ASSERT(input != nullptr); auto output = outputs_.front(); MS_ASSERT(output != nullptr); - if (inputs_.size() != kSingleNum || outputs_.size() != kSingleNum) { + if (inputs_.size() != kSingleNum || outputs_.size() > kDoubleNum) { MS_LOG(ERROR) << "tensor number is error."; return RET_ERROR; } output->set_format(input->format()); - output->set_data_type(input->data_type()); + if (GetOutMaxValue() && outputs_.size() == kSingleNum) { + output->set_data_type(input->data_type()); + } else { + output->set_data_type(kNumberTypeInt32); + } if (!infer_flag()) { return RET_INFER_INVALID; } @@ -119,6 +123,11 @@ int ArgMax::InferShape(std::vector inputs_, std::vector outp } output->set_shape(output_shape); + if (outputs_.size() == kDoubleNum) { + outputs_.at(1)->set_format(input->format()); + outputs_.at(1)->set_data_type(input->data_type()); + outputs_.at(1)->set_shape(output_shape); + } return RET_OK; } } // namespace lite diff --git a/mindspore/lite/src/ops/argmin.cc b/mindspore/lite/src/ops/argmin.cc index daf856a21d..cf9e7d5d4e 100644 --- a/mindspore/lite/src/ops/argmin.cc +++ b/mindspore/lite/src/ops/argmin.cc @@ -101,7 +101,11 @@ int ArgMin::InferShape(std::vector inputs_, std::vectorset_format(input->format()); - output->set_data_type(input->data_type()); + if (GetOutMaxValue() && outputs_.size() == kSingleNum) { + output->set_data_type(input->data_type()); + } else { + output->set_data_type(kNumberTypeInt32); + } if (!infer_flag()) { return RET_INFER_INVALID; } diff --git a/mindspore/lite/src/ops/concat.cc b/mindspore/lite/src/ops/concat.cc index 3f8bfb5a55..45eff714a1 100644 --- a/mindspore/lite/src/ops/concat.cc +++ b/mindspore/lite/src/ops/concat.cc @@ -116,6 +116,12 @@ int Concat::InferShape(std::vector inputs_, std::vector outp MS_LOG(ERROR) << "All inputs should have the same dim num!"; return RET_PARAM_INVALID; } + if ((inputs_.at(i)->data_type() != output->data_type()) && + !((inputs_.at(i)->data_type() == kNumberTypeFloat16 && output->data_type() == kNumberTypeFloat32) || + (inputs_.at(i)->data_type() == kNumberTypeFloat32 && output->data_type() == kNumberTypeFloat16))) { + MS_LOG(ERROR) << "All inputs should have the same type!"; + return RET_PARAM_INVALID; + } auto axis_tmp = shape_tmp[axis]; shape_tmp.erase(shape_tmp.begin() + axis); if (input0_shape_without_axis != shape_tmp) { diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc index a494113781..7125b11872 100644 --- a/mindspore/lite/src/ops/primitive_c.cc +++ b/mindspore/lite/src/ops/primitive_c.cc @@ -627,6 +627,8 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "MirrorPad") { return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "InstanceNorm") { + return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Gather") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "OnesLike") { diff --git a/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc index 660997a61c..04ad4ea6e0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/argminmax_base.cc @@ -72,11 +72,11 @@ int ArgMinMaxCPUKernel::Run() { } } if (input->data_type() == kNumberTypeFloat32) { - ArgMinMaxFp32(reinterpret_cast(input_data), reinterpret_cast(output_data), + ArgMinMaxFp32(reinterpret_cast(input_data), reinterpret_cast(output_data), reinterpret_cast(output_value), shape.data(), arg_param_); #ifdef ENABLE_ARM64 } else if (input->data_type() == kNumberTypeFloat16) { - ArgMinMaxFp16(reinterpret_cast(input_data), reinterpret_cast(output_data), + ArgMinMaxFp16(reinterpret_cast(input_data), reinterpret_cast(output_data), reinterpret_cast(output_value), shape.data(), arg_param_); #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc index d18c530fc6..8d8c62c89b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc @@ -65,7 +65,6 @@ int GatherNdCPUKernel::ReSize() { MS_LOG(ERROR) << "GatherNd Malloc in_offset_ error!"; return RET_ERROR; } - (void)memset(in_offset_, 0, count_ * sizeof(int)); thread_sz_count_ = MSMIN(thread_count_, count_); if (thread_sz_count_ != 0) { thread_sz_stride_ = UP_DIV(count_, thread_sz_count_); @@ -93,6 +92,7 @@ void GatherNdCPUKernel::InitOffset() { } int idx_stride = idx_lastshape; + (void)memset(in_offset_, 0, count_ * sizeof(int)); for (int j = 0; j < count_; ++j) { for (int k = 0; k < idx_lastshape; ++k) { in_offset_[j] += indices_ptr[j * idx_stride + k] * in_stride.at(k); diff --git a/mindspore/lite/test/models_mindspore.cfg b/mindspore/lite/test/models_mindspore.cfg index d10996b213..889f9a190a 100644 --- a/mindspore/lite/test/models_mindspore.cfg +++ b/mindspore/lite/test/models_mindspore.cfg @@ -3,6 +3,8 @@ mobilenetv2_438.mindir 1.5 gate_u_net_small-1_110.mindir 1.5 shufflenetv2.mindir 1.5 #inceptionv3.mindir 1.5 +cyclegan_AtoB.mindir 0.5 +cyclegan_BtoA.mindir 0.5 googlenet.mindir 1.5 retinaface_732_1280_iod.mindir 1.5 mobilefacenet_iod.mindir 1.5 @@ -16,6 +18,7 @@ mindspore_ghost-pets-8244.mindir 1.5 mindspore_ghostnet600M-pets.mindir 1.5 mindspore_ghostnet_1x_pets_int8.mindir 12 mindspore_deeplab_v3_s16.mindir 6.5 +CenterNet_MultiPose_ascend.mindir 0.5 googlenet_1202.mindir 0.5 inceptionv3_1203.mindir 0.5 mobilenetv2_gpu.mindir 0.5 diff --git a/mindspore/lite/test/models_onnx.cfg b/mindspore/lite/test/models_onnx.cfg index b612ac93ab..d70b4acb40 100644 --- a/mindspore/lite/test/models_onnx.cfg +++ b/mindspore/lite/test/models_onnx.cfg @@ -50,4 +50,4 @@ hdc_Face_Landmark5_MTI_Aesthetic.onnx hdc_Image_Aesthetic_MTI_Aesthetic.onnx hdc_mobilenet_1w_class.onnx hdc_resnet_1w_class.onnx -ml_video_edit_imitate_filter.onnx +#ml_video_edit_imitate_filter.onnx diff --git a/mindspore/lite/test/run_net_train.sh b/mindspore/lite/test/run_net_train.sh index 1aaf25efd1..641531b604 100755 --- a/mindspore/lite/test/run_net_train.sh +++ b/mindspore/lite/test/run_net_train.sh @@ -81,14 +81,13 @@ function Run_x86() { echo ${model_name}'_train' >> "${run_x86_log_file}" echo 'cd '${x86_path}'/mindspore-lite-'${version}'-train-linux-x64' >> "${run_x86_log_file}" cd ${x86_path}/mindspore-lite-${version}-train-linux-x64 || return 1 - echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib ./benchmark_train/benchmark_train --epochs='${epoch_num}' --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin' --expectedDataFile='${train_io_path}'/'${model_name}'_output --exportFile='${ms_models_path}'/'${model_name}'_train_exported.ms' >> "${run_x86_log_file}" + echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib ./benchmark_train/benchmark_train --epochs='${epoch_num}' --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin' --expectedDataFile='${train_io_path}'/'${model_name}'_output' >> "${run_x86_log_file}" echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}" LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib:./minddata/lib:./minddata/third_party/libjpeg-turbo/lib \ ${run_valgrind}./benchmark_train/benchmark_train \ --modelFile=${ms_models_path}/${model_name}_train.ms \ --inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \ - --expectedDataFile=${train_io_path}/${model_name}_output \ - --exportFile=${ms_models_path}/${model_name}_train_exported.ms >> "${run_x86_log_file}" \ + --expectedDataFile=${train_io_path}/${model_name}_output >> "${run_x86_log_file}" \ --epochs=${epoch_num} --numThreads=${threads} if [ $? = 0 ]; then run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_benchmark_train_result_file} @@ -183,7 +182,6 @@ function Run_arm() { --modelFile=${model_name}_train.ms \ --inDataFile=${tmp_dir}/${model_name}_input1.bin,${tmp_dir}/${model_name}_input2.bin \ --expectedDataFile=${tmp_dir}/${model_name}_output \ - --exportFile=${tmp_dir}/${model_name}_train_exported.ms \ --numThreads=${threads} ENDM ) diff --git a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc index dbe3452f79..16465a2bbe 100644 --- a/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/conv_transform_fusion.cc @@ -164,7 +164,31 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_INVALID_OP_ATTR); return; } - CalNewWeightTensor(conv_node, weight_tensor, kernel_num, trans_scale); + auto temp_weight_data = new (std::nothrow) float[weight_tensor->tensor_shape_size()]; + if (temp_weight_data == nullptr) { + MS_LOG(ERROR) << "new ParamValueLite failed"; + lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_ERROR); + return; + } + auto new_weight_tensor = std::make_shared(); + if (new_weight_tensor == nullptr) { + delete temp_weight_data; + MS_LOG(ERROR) << "new ParamValueLite failed"; + return; + } + new_weight_tensor->set_tensor_size(weight_tensor->tensor_size()); + new_weight_tensor->set_tensor_shape(weight_tensor->tensor_shape()); + new_weight_tensor->set_tensor_type(weight_tensor->tensor_type()); + new_weight_tensor->set_format(weight_tensor->format()); + auto ret = memcpy_s(temp_weight_data, weight_tensor->tensor_shape_size() * sizeof(float), + weight_tensor->tensor_addr(), weight_tensor->tensor_shape_size() * sizeof(float)); + if (ret != EOK) { + delete temp_weight_data; + MS_LOG(ERROR) << "memcpy_s error:" << ret; + return; + } + new_weight_tensor->set_tensor_addr(temp_weight_data); + CalNewWeightTensor(conv_node, new_weight_tensor, kernel_num, trans_scale); float *bias_data = nullptr; // conv has bias,bias_flag true bool bias_flag = false; @@ -177,6 +201,7 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const bias_data = new (std::nothrow) float[kernel_num]; if (bias_data == nullptr) { MS_LOG(ERROR) << "tensor_data is nullptr"; + delete temp_weight_data; return; } } @@ -186,6 +211,16 @@ void ConvTransformFusion::GenNewConvTensor(const FuncGraphPtr &func_graph, const bias_node->set_name(conv_node->fullname_with_scope() + "_bias"); conv_node->add_input(bias_node); } + auto new_weight_paramter = func_graph->add_parameter(); + if (new_weight_paramter == nullptr) { + MS_LOG(ERROR) << "new_weight_paramter is nullptr"; + delete temp_weight_data; + return; + } + new_weight_paramter->set_default_param(new_weight_tensor); + new_weight_paramter->set_abstract(conv_weight_node->abstract()); + new_weight_paramter->set_name(conv_node->fullname_with_scope() + conv_weight_node->fullname_with_scope()); + conv_node->set_input(kConvWeightIndex, new_weight_paramter); } void ConvTransformFusion::CalNewWeightTensor(const CNodePtr &conv_node, const ParamValueLitePtr &weight_tensor, int kernel_num, const float *trans_scale) const { diff --git a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc index 4db6760829..881a516186 100644 --- a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc +++ b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.cc @@ -27,6 +27,44 @@ using mindspore::lite::PrimitiveC; namespace mindspore { namespace opt { +int MindirAdjustPass::ValueNodeInt64Convert(AnfNodePtr anf_node) { + if (!utils::isa(anf_node)) { + return lite::RET_NO_CHANGE; + } + auto valueNode = anf_node->cast(); + if (valueNode->abstract() == nullptr) { + return lite::RET_NO_CHANGE; + } + auto abstractTensor = utils::cast(valueNode->abstract()); + if (abstractTensor == nullptr) { + return lite::RET_NO_CHANGE; + } + auto value = abstractTensor->GetValueTrack(); + if (value != nullptr && value->isa()) { + if (abstractTensor->element() == nullptr) { + MS_LOG(ERROR) << "abstractTensor->element() is nullptr."; + return RET_ERROR; + } + auto typePtr = abstractTensor->element()->GetTypeTrack(); + if (typePtr->type_id() == kNumberTypeInt64) { + auto shape_vector = utils::cast(abstractTensor->BuildShape())->shape(); + auto dest_tensor_info = std::make_shared(kNumberTypeInt32, shape_vector); + auto *dest_data_buf = reinterpret_cast(dest_tensor_info->data_c()); + auto src_tensor_info = value->cast(); + auto *src_data_buf = reinterpret_cast(src_tensor_info->data_c()); + MS_ASSERT(dest_tensor_info->ElementsNum() == src_tensor_info->ElementsNum()); + for (int i = 0; i < dest_tensor_info->ElementsNum(); i++) { + dest_data_buf[i] = src_data_buf[i]; + } + abstractTensor->set_value(dest_tensor_info); + abstractTensor->set_type(TypeIdToType(kNumberTypeInt32)); + abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt32)); + valueNode->set_value(dest_tensor_info); + } + } + return lite::RET_NO_CHANGE; +} + int MindirAdjustPass::ParameterNodeConvert(AnfNodePtr anf_node) { if (!utils::isa(anf_node)) { MS_LOG(INFO) << "only parameter node need to convert tensor."; @@ -139,7 +177,10 @@ bool MindirAdjustPass::Run(const FuncGraphPtr &graph) { status = ParameterNodeConvert(node); } else if (utils::isa(node)) { status = PrimitiveConvert(node); + } else if (utils::isa(node)) { + status = ValueNodeInt64Convert(node); } + if (status != lite::RET_OK && status != lite::RET_NO_CHANGE) { lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); success_flag = false; diff --git a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.h b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.h index 77ac864dab..6a9383e90c 100644 --- a/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.h +++ b/mindspore/lite/tools/optimizer/graph/mindir_adjust_pass.h @@ -32,6 +32,7 @@ class MindirAdjustPass : public Pass { ~MindirAdjustPass() override = default; void SetQuantType(QuantType quant_type) { quant_type_ = quant_type; } void SetFmkType(FmkType fmk_type) { fmk_type_ = fmk_type; } + int ValueNodeInt64Convert(AnfNodePtr anf_node); int ParameterNodeConvert(AnfNodePtr anf_node); int PrimitiveConvert(AnfNodePtr anf_node); bool Run(const FuncGraphPtr &graph) override;