From afc27a3bb4d16fcd3e375ea1a33d6c04a5c08fcc Mon Sep 17 00:00:00 2001 From: yangruoqi713 Date: Tue, 10 Nov 2020 14:19:23 +0800 Subject: [PATCH] [MSLITE][Develop] optimize arm cpu int8 depthwise: init weight and bias before resize --- .../nnacl/assembly/arm32/DeconvDwInt8Center.S | 2 +- mindspore/lite/nnacl/fp32/conv_depthwise.c | 90 +++++++------ mindspore/lite/nnacl/fp32/conv_depthwise.h | 7 +- .../lite/nnacl/int8/conv_depthwise_int8.c | 121 ++++++++---------- .../lite/nnacl/int8/conv_depthwise_int8.h | 4 +- .../convolution_depthwise_slidewindow_fp32.cc | 2 +- .../arm/fp32/deconvolution_depthwise_fp32.cc | 2 +- .../int8/convolution_depthwise_3x3_int8.cc | 20 +-- .../arm/int8/convolution_depthwise_int8.cc | 18 +-- .../convolution_depthwise_slidewindow_int8.cc | 22 ++-- 10 files changed, 133 insertions(+), 155 deletions(-) diff --git a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S index abae39e13a..02141904a5 100644 --- a/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S +++ b/mindspore/lite/nnacl/assembly/arm32/DeconvDwInt8Center.S @@ -1,4 +1,4 @@ -#ifdef __arm__ +#if 0 #ifndef __aarch64__ .text diff --git a/mindspore/lite/nnacl/fp32/conv_depthwise.c b/mindspore/lite/nnacl/fp32/conv_depthwise.c index af25401f55..5f86343d61 100644 --- a/mindspore/lite/nnacl/fp32/conv_depthwise.c +++ b/mindspore/lite/nnacl/fp32/conv_depthwise.c @@ -152,9 +152,8 @@ void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter * } /*conv depthwise fp32 begin*/ -#ifndef ENABLE_ARM64 -void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, - int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) { +void ConvDwBorderPixel(float *dst, const float *src, const float *weight, const float *bias, int height, int width, + int in_kh_step, int in_kw_step, int kernel_w_step, bool is_relu, bool is_relu6) { const float *src_kh = src; const float *weight_kh = weight; for (int c = 0; c < C4NUM; c++) { @@ -179,10 +178,9 @@ void DepthwiseBorderPixel(float *dst, const float *src, const float *weight, con dst[c] = (is_relu6) ? (MSMIN(6, MSMAX(0, dst[c]))) : (dst[c]); } } -#endif -void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, - int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { +void ConvDwBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, int left, + int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { bool relu = conv_param->act_type_ == ActType_Relu; bool relu6 = conv_param->act_type_ == ActType_Relu6; float *dst_h = dst + top * sliding->out_h_step_; @@ -207,8 +205,8 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), conv_param->kernel_w_ * C4NUM * sizeof(float), relu, relu6); #else - DepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, - sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6); + ConvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, + sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM, relu, relu6); #endif dst_kernel += sliding->block_channel_; } // width loop @@ -217,9 +215,9 @@ void DepthwiseBorder(float *dst, const float *src, const float *weight, const fl } #ifndef ENABLE_ARM64 -void DepthwiseCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, - int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, - int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { +void ConvDwCenter(float *dst, const float *src, const float *weight, const float *bias, int height, int width, + int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, + int in_kh_step, int in_kw_step, bool is_relu, bool is_relu6) { float *dst_h = dst; const float *src_h = src; for (int oh = 0; oh < height; oh++) { @@ -260,7 +258,7 @@ void DepthwiseCenter(float *dst, const float *src, const float *weight, const fl #endif // conv depthwise fp32: sliding window -void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, +void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { bool relu = conv_param->act_type_ == ActType_Relu; bool relu6 = conv_param->act_type_ == ActType_Relu6; @@ -272,14 +270,13 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig float *dst_data = dst + oc * C4NUM; const float *weight = weight_data + oc * sliding->kernel_step_; const float *bias = bias_data + oc * C4NUM; - DepthwiseBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, - sliding); - DepthwiseBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, - conv_param->output_w_, conv_param, sliding); - DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, - sliding); - DepthwiseBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, - conv_param->output_w_, conv_param, sliding); + ConvDwBorder(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, sliding); + ConvDwBorder(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, conv_param->output_w_, + conv_param, sliding); + ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, + sliding); + ConvDwBorder(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, + conv_param->output_w_, conv_param, sliding); if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; @@ -293,10 +290,10 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), relu, relu6); #else - DepthwiseCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, - conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, - sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu, - relu6); + ConvDwCenter(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, + conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, + sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, relu, + relu6); #endif } } // output C4 loop @@ -308,8 +305,8 @@ void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weig /*conv depthwise fp32 end*/ /*deconv depthwise fp32 begin*/ -void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weight, int height, int width, - int in_kh_step, int in_kw_step, int kernel_w_step) { +void DeconvDwBorderPixel(float *dst, const float *src, const float *weight, int height, int width, int in_kh_step, + int in_kw_step, int kernel_w_step) { float *dst_kh = dst; const float *weight_kh = weight; for (int kh = 0; kh < height; kh++) { @@ -335,8 +332,8 @@ void DeconvDepthwiseBorderPixel(float *dst, const float *src, const float *weigh } // kernel_h loop } -void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right, - const ConvParameter *conv_param, const SlidingWindowParam *sliding) { +void DeconvDwBorder(float *dst, const float *src, const float *weight, int top, int bottom, int left, int right, + const ConvParameter *conv_param, const SlidingWindowParam *sliding) { const float *src_h = src + top * sliding->out_h_step_; for (int ih = top; ih < bottom; ih++) { int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; @@ -358,8 +355,8 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float), conv_param->kernel_w_ * C4NUM * sizeof(float)); #else - DeconvDepthwiseBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, - sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM); + DeconvDwBorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, + sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_ * C4NUM); #endif src_kernel += sliding->block_channel_; } // width loop @@ -368,9 +365,9 @@ void DeconvDepthwiseBorder(float *dst, const float *src, const float *weight, in } #ifndef ENABLE_ARM64 -void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h, - int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, - int in_kh_step, int in_kw_step) { +void DeconvDwCenter(float *dst, const float *src, const float *weight, int height, int width, int kernel_h, + int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step, + int in_kw_step) { float *dst_h = dst; const float *src_h = src; for (int oh = 0; oh < height; oh++) { @@ -401,7 +398,7 @@ void DeconvDepthwiseCenter(float *dst, const float *src, const float *weight, in } #endif -void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) { +void DeconvDwPost(float *dst, const float *bias, int block_channel, const ConvParameter *conv_param) { bool relu = conv_param->act_type_ == ActType_Relu; bool relu6 = conv_param->act_type_ == ActType_Relu6; float *dst_k = dst; @@ -416,7 +413,7 @@ void DeconvDepthwisePostFunc(float *dst, const float *bias, int block_channel, c } // deconv depthwise fp32: sliding window -void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, +void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { const float *src = input_data; float *dst = output_data; @@ -426,13 +423,13 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we float *dst_data = dst + oc * C4NUM; const float *weight = weight_data + oc * sliding->kernel_step_; const float *bias = bias_data + oc * C4NUM; - DeconvDepthwiseBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding); - DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_, - conv_param, sliding); - DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, - sliding); - DeconvDepthwiseBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, - conv_param->input_w_, conv_param, sliding); + DeconvDwBorder(dst_data, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, sliding); + DeconvDwBorder(dst_data, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, conv_param->input_w_, + conv_param, sliding); + DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, + sliding); + DeconvDwBorder(dst_data, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, conv_param->input_w_, + conv_param, sliding); if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; @@ -447,13 +444,12 @@ void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *we sliding->in_sw_step_ * sizeof(float), sliding->in_kh_step_ * sizeof(float), sliding->in_kw_step_ * sizeof(float)); #else - DeconvDepthwiseCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, - conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, - sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, - sliding->in_kw_step_); + DeconvDwCenter(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, + conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, + sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); #endif } - DeconvDepthwisePostFunc(dst_data, bias, sliding->block_channel_, conv_param); + DeconvDwPost(dst_data, bias, sliding->block_channel_, conv_param); } // output C4 loop src += sliding->out_step_; dst += sliding->in_step_; diff --git a/mindspore/lite/nnacl/fp32/conv_depthwise.h b/mindspore/lite/nnacl/fp32/conv_depthwise.h index 4edf2105fb..92da05f7c3 100644 --- a/mindspore/lite/nnacl/fp32/conv_depthwise.h +++ b/mindspore/lite/nnacl/fp32/conv_depthwise.h @@ -42,13 +42,10 @@ void InitSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *co void AppendSlidingParamConvDw(SlidingWindowParam *sliding, const ConvParameter *conv_param, int block); -void DepthwiseBorder(float *dst, const float *src, const float *weight, const float *bias, int top, int bottom, - int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding); - -void ConvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, +void ConvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); -void DeconvDwC4Fp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, +void DeconvDwSWFp32(float *output_data, const float *input_data, const float *weight_data, const float *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); #ifdef __cplusplus diff --git a/mindspore/lite/nnacl/int8/conv_depthwise_int8.c b/mindspore/lite/nnacl/int8/conv_depthwise_int8.c index c13eea8f38..4f96c630b4 100644 --- a/mindspore/lite/nnacl/int8/conv_depthwise_int8.c +++ b/mindspore/lite/nnacl/int8/conv_depthwise_int8.c @@ -139,7 +139,7 @@ void ConvDwInt8(int8_t *output_data, int32_t *row_buffer, const int8_t *input_da /*conv depthwise int8 end*/ /*conv depthwise 3x3 int8 begin*/ -bool CheckIfUse3X3(const ConvParameter *conv_param) { +bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param) { bool use_3x3 = conv_param->kernel_h_ == 3 && conv_param->kernel_w_ == 3 && (conv_param->stride_h_ == 1 || conv_param->stride_h_ == 2) && (conv_param->stride_w_ == 1 || conv_param->stride_w_ == 2) && @@ -158,8 +158,8 @@ bool CheckIfUse3X3(const ConvParameter *conv_param) { return use_3x3; } -void InitInputBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h, - int block_input_w) { +void ConvDw3x3Int8InitBuffer(int8_t *buffer, const int8_t *input, const ConvParameter *conv_param, int block_input_h, + int block_input_w) { for (int h = 0; h < block_input_h; h++) { const int8_t *src = input; for (int w = 0; w < block_input_w; w++) { @@ -257,7 +257,7 @@ void ConvDw3x3Int8Row(int8_t *output, int8_t *buffer, const int8_t *input, const const int32_t *bias_ptr = bias; int c = 0; for (; c <= conv_param->output_channel_ - 64; c += 64) { - InitInputBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w); + ConvDw3x3Int8InitBuffer(buffer, input_ptr, conv_param, block_input_h, block_input_w); ConvDw3x3Int8Block(output_ptr, buffer, weight_ptr, bias_ptr, 0, 64, 64, ih_offset, conv_param->input_channel_, block_output_h, block_output_w, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max, conv_param->stride_h_); @@ -489,10 +489,10 @@ void ConvDw3x3Int8Pad(int8_t *output_data, const int8_t *input_data, const int16 /*conv depthwise 3x3 int8 end*/ /*conv depthwise sliding window perchannel int8 begin*/ -void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, - int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, - int32_t *out_zp, const int *out_multiplier, const int *left_shift, const int *right_shift, - int32_t *acc_min, int32_t *acc_max) { +void ConvDwInt8BorderPixel(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, + int width, int in_kh_step, int in_kw_step, int kernel_w, int8_t *input_zp, int32_t *out_zp, + const int *out_multiplier, const int *left_shift, const int *right_shift, int32_t *acc_min, + int32_t *acc_max) { int tmp_buffer[C8NUM]; for (int i = 0; i < C8NUM; i++) { tmp_buffer[i] = 0; @@ -525,10 +525,10 @@ void DepthwiseBorderPixelInt8(int8_t *dst, const int8_t *src, const int16_t *wei } } -void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, - int bottom, int left, int right, const ConvParameter *conv_param, - const SlidingWindowParam *sliding, int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, - const int *left_shift, const int *right_shift, int32_t *acc_min, int32_t *acc_max) { +void ConvDwInt8Border(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int top, int bottom, + int left, int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding, + int8_t *in_zp, int32_t *out_zp, const int *out_multiplier, const int *left_shift, + const int *right_shift, int32_t *acc_min, int32_t *acc_max) { int8_t *dst_h = dst + top * sliding->out_h_step_; for (int oh = top; oh < bottom; oh++) { int ih = oh * conv_param->stride_h_ - conv_param->pad_u_; @@ -546,9 +546,9 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int8_t *src_kernel = src_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C8NUM; - DepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, - sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp, - out_multiplier, left_shift, right_shift, acc_min, acc_max); + ConvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, bias, end_kh - start_kh, end_kw - start_kw, + sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_, in_zp, out_zp, + out_multiplier, left_shift, right_shift, acc_min, acc_max); dst_kernel += sliding->block_channel_; } // width loop @@ -556,12 +556,11 @@ void DepthwiseBorderInt8(int8_t *dst, const int8_t *src, const int16_t *weight, } // height loop } -#ifndef ENABLE_ARM64 -void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, - int width, int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, - int in_sw_step, int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, - int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift, int32_t *acc_min, - int32_t *acc_max) { +#ifndef ENABLE_ARM +void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, int height, int width, + int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, + int in_kh_step, int in_kw_step, int8_t *in_zp, int32_t *out_zp, int32_t *out_multiplier, + int32_t *left_shift, int32_t *right_shift, int32_t *acc_min, int32_t *acc_max) { int tmp_buffer[C8NUM]; int8_t *dst_h = dst; const int8_t *src_h = src; @@ -608,7 +607,7 @@ void DepthwiseCenterInt8(int8_t *dst, const int8_t *src, const int16_t *weight, } #endif -void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, +void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id) { const int8_t *src = input_data; @@ -628,37 +627,26 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t * int8_t *in_zp = input_zp + oc * C8NUM; int32_t *out_zp = output_zp + oc * C8NUM; - DepthwiseBorderInt8(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, - sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); - DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, - conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, - right_shift, acc_min, acc_max); - DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, - conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, - acc_max); - DepthwiseBorderInt8(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, - conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, - right_shift, acc_min, acc_max); + ConvDwInt8Border(dst_data, src_data, weight, bias, 0, sliding->top_, 0, conv_param->output_w_, conv_param, + sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); + ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->bottom_, conv_param->output_h_, 0, + conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, + right_shift, acc_min, acc_max); + ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, 0, sliding->left_, conv_param, + sliding, in_zp, out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); + ConvDwInt8Border(dst_data, src_data, weight, bias, sliding->top_, sliding->bottom_, sliding->right_, + conv_param->output_w_, conv_param, sliding, in_zp, out_zp, out_multiplier, left_shift, + right_shift, acc_min, acc_max); if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { int in_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; int in_w_start = sliding->left_ * conv_param->stride_w_ - conv_param->pad_l_; const int8_t *in_t = src_data + in_h_start * sliding->in_h_step_ + in_w_start * sliding->block_channel_; int8_t *out_t = dst_data + sliding->top_ * sliding->out_h_step_ + sliding->left_ * sliding->block_channel_; -#ifdef ENABLE_ARM ConvDwInt8Center(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, - conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_ * sizeof(int8_t), - sliding->block_channel_ * sizeof(int8_t), sliding->in_sh_step_ * sizeof(int8_t), - sliding->in_sw_step_ * sizeof(int8_t), sliding->in_kh_step_ * sizeof(int8_t), - sliding->in_kw_step_ * sizeof(int8_t), in_zp, out_zp, out_multiplier, left_shift, right_shift, - acc_min, acc_max); -#else - DepthwiseCenterInt8(out_t, in_t, weight, bias, sliding->bottom_ - sliding->top_, - sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_, - sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_, sliding->in_sw_step_, - sliding->in_kh_step_, sliding->in_kw_step_, in_zp, out_zp, out_multiplier, left_shift, - right_shift, acc_min, acc_max); -#endif + conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, + sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_, in_zp, + out_zp, out_multiplier, left_shift, right_shift, acc_min, acc_max); } } // output C8 loop src += sliding->in_step_; @@ -669,8 +657,8 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t * /*conv depthwise sliding window perchannel int8 end*/ /*deconv depthwise int8 begin*/ -void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, - int in_kh_step, int in_kw_step, int kernel_w) { +void DeconvDwInt8BorderPixel(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, + int in_kh_step, int in_kw_step, int kernel_w) { int32_t *dst_kh = dst; const int16_t *weight_kh = weight; for (int kh = 0; kh < height; kh++) { @@ -688,8 +676,8 @@ void DeconvDepthwiseBorderPixelInt8(int32_t *dst, const int16_t *src, const int1 } // kernel_h loop } -void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left, - int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { +void DeconvDwInt8Border(int32_t *dst, const int16_t *src, const int16_t *weight, int top, int bottom, int left, + int right, const ConvParameter *conv_param, const SlidingWindowParam *sliding) { const int16_t *src_h = src + top * sliding->out_h_step_; for (int ih = top; ih < bottom; ih++) { int oh = ih * conv_param->stride_h_ - conv_param->pad_u_; @@ -707,8 +695,8 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t * const int16_t *weight_kernel = weight + (start_kh * conv_param->kernel_w_ + start_kw) * C4NUM; int32_t *dst_kernel = dst_w + start_kh * sliding->in_kh_step_ + start_kw * sliding->in_kw_step_; - DeconvDepthwiseBorderPixelInt8(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, - sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_); + DeconvDwInt8BorderPixel(dst_kernel, src_kernel, weight_kernel, end_kh - start_kh, end_kw - start_kw, + sliding->in_kh_step_, sliding->in_kw_step_, conv_param->kernel_w_); src_kernel += sliding->block_channel_; } // width loop src_h += sliding->out_h_step_; @@ -716,9 +704,9 @@ void DeconvDepthwiseBorderInt8(int32_t *dst, const int16_t *src, const int16_t * } #ifndef ENABLE_ARM64 -void DeconvDepthwiseCenterInt8(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, - int kernel_h, int kernel_w, int out_h_step, int block_channel, int in_sh_step, - int in_sw_step, int in_kh_step, int in_kw_step) { +void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, int height, int width, int kernel_h, + int kernel_w, int out_h_step, int block_channel, int in_sh_step, int in_sw_step, int in_kh_step, + int in_kw_step) { int32_t *dst_h = dst; const int16_t *src_h = src; for (int oh = 0; oh < height; oh++) { @@ -784,14 +772,14 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in const int16_t *weight = weight_data + oc * sliding->kernel_step_; const int32_t *bias = bias_data + oc * C4NUM; int8_t *dst_data = dst + oc * C4NUM; - DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, - sliding); - DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, - conv_param->input_w_, conv_param, sliding); - DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, - conv_param, sliding); - DeconvDepthwiseBorderInt8(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, - conv_param->input_w_, conv_param, sliding); + DeconvDwInt8Border(output_buffer, src_data, weight, 0, sliding->top_, 0, conv_param->input_w_, conv_param, + sliding); + DeconvDwInt8Border(output_buffer, src_data, weight, sliding->bottom_, conv_param->input_h_, 0, + conv_param->input_w_, conv_param, sliding); + DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, 0, sliding->left_, + conv_param, sliding); + DeconvDwInt8Border(output_buffer, src_data, weight, sliding->top_, sliding->bottom_, sliding->right_, + conv_param->input_w_, conv_param, sliding); if (sliding->right_ > sliding->left_ && sliding->bottom_ > sliding->top_) { int oh_h_start = sliding->top_ * conv_param->stride_h_ - conv_param->pad_u_; @@ -806,10 +794,9 @@ void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *in sliding->in_sw_step_ * sizeof(int32_t), sliding->in_kh_step_ * sizeof(int32_t), sliding->in_kw_step_ * sizeof(int32_t)); #else - DeconvDepthwiseCenterInt8(out_t, in_t, weight, sliding->bottom_ - sliding->top_, - sliding->right_ - sliding->left_, conv_param->kernel_h_, conv_param->kernel_w_, - sliding->out_h_step_, sliding->block_channel_, sliding->in_sh_step_, - sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); + DeconvDwInt8Center(out_t, in_t, weight, sliding->bottom_ - sliding->top_, sliding->right_ - sliding->left_, + conv_param->kernel_h_, conv_param->kernel_w_, sliding->out_h_step_, sliding->block_channel_, + sliding->in_sh_step_, sliding->in_sw_step_, sliding->in_kh_step_, sliding->in_kw_step_); #endif } DeconvDwInt8Post(dst_data, output_buffer, bias, sliding->block_channel_, diff --git a/mindspore/lite/nnacl/int8/conv_depthwise_int8.h b/mindspore/lite/nnacl/int8/conv_depthwise_int8.h index ae132861c6..4c1263b2a3 100644 --- a/mindspore/lite/nnacl/int8/conv_depthwise_int8.h +++ b/mindspore/lite/nnacl/int8/conv_depthwise_int8.h @@ -24,7 +24,7 @@ extern "C" { #endif -bool CheckIfUse3X3(const ConvParameter *conv_param); +bool CheckConvDwInt8Use3X3(const ConvParameter *conv_param); void ConvDwInt8(int8_t *output_data, int32_t *output_row, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, const ConvParameter *conv_param, int task_id); @@ -36,7 +36,7 @@ void ConvDw3x3Int8(int8_t *output_data, int8_t *buffer, const int8_t *input_data const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); -void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, +void ConvDwInt8SW(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data, int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc index 2ee4f70407..e15d1a5502 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc @@ -118,7 +118,7 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() { } int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) { - ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, + ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc index 3a129e1379..c317b1118b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc @@ -130,7 +130,7 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() { } int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) { - DeconvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, + DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc index 44a74eeecc..2774f54c12 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc @@ -99,26 +99,26 @@ int ConvolutionDepthwise3x3Int8CPUKernel::Init() { MS_LOG(ERROR) << "new sliding window param."; return RET_ERROR; } - if (!InferShapeDone()) { - return RET_OK; - } - return ReSize(); -} - -int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() { - ConvolutionBaseCPUKernel::Init(); - InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_); auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set quant param failed."; return ret; } - conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); ret = InitWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; return ret; } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int ConvolutionDepthwise3x3Int8CPUKernel::ReSize() { + ConvolutionBaseCPUKernel::Init(); + InitSlidingParamConvDw(sliding_, conv_param_, conv_param_->input_channel_); + conv_param_->thread_num_ = MSMIN(thread_count_, conv_param_->output_h_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index 6b7f469c03..ccfb3d6f77 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -90,14 +90,6 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { } int ConvolutionDepthwiseInt8CPUKernel::Init() { - if (!InferShapeDone()) { - return RET_OK; - } - return ReSize(); -} - -int ConvolutionDepthwiseInt8CPUKernel::ReSize() { - ConvolutionBaseCPUKernel::Init(); auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set quant param failed."; @@ -109,6 +101,14 @@ int ConvolutionDepthwiseInt8CPUKernel::ReSize() { MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; return ret; } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int ConvolutionDepthwiseInt8CPUKernel::ReSize() { + ConvolutionBaseCPUKernel::Init(); return RET_OK; } @@ -181,7 +181,7 @@ kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector conv_param->output_w_ = outputs[kOutputIndex]->Width(); } auto weight_quant_size = inputs[kWeightIndex]->GetQuantParams().size(); - if (CheckIfUse3X3(conv_param) && weight_quant_size == 1) { + if (CheckConvDwInt8Use3X3(conv_param) && weight_quant_size == 1) { #ifdef ENABLE_ARM64 kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3Int8CPUKernel(opParameter, inputs, outputs, ctx, primitive); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc index d625bf619d..e562a053ef 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc @@ -275,16 +275,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Init() { MS_LOG(ERROR) << "new sliding window param."; return RET_ERROR; } - if (!InferShapeDone()) { - return RET_OK; - } - return ReSize(); -} - -int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { - ConvolutionBaseCPUKernel::Init(); - InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); - auto ret = ConvolutionBaseCPUKernel::SetQuantParam(); if (ret != RET_OK) { MS_LOG(ERROR) << "Set quant param failed."; @@ -295,17 +285,25 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { MS_LOG(ERROR) << "reinit quant param failed."; return ret; } - ret = InitWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Depthwise int8 InitWeightBias error!"; return ret; } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int ConvolutionDepthwiseSWInt8CPUKernel::ReSize() { + ConvolutionBaseCPUKernel::Init(); + InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); return RET_OK; } int ConvolutionDepthwiseSWInt8CPUKernel::Execute(int task_id) { - ConvDwSWInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), input_zp_, + ConvDwInt8SW(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), input_zp_, output_zp_, conv_param_, sliding_, task_id); return RET_OK; }