Optimize depthwise convolution 3x3 for arm64 platform

pull/7640/head
zhanyuan 4 years ago
parent 55a277d1de
commit 4b810f5ee3

File diff suppressed because it is too large Load Diff

@ -208,8 +208,13 @@ void ConvDw3x3Int8Block(int8_t *output, const int8_t *buffer, const int16_t *wei
int32_t out_zp, int out_multiplier, int left_shift, int right_shift, int32_t acc_min,
int32_t acc_max, int stride) {
for (; start_c <= end_c - 8; start_c += 8) {
#ifdef ENABLE_ARM64
ConvDw3x3Int8Neon64(output, buffer, weight, bias, col_size, row_size, channel, output_h, output_w, in_zp, out_zp,
out_multiplier, left_shift, right_shift, acc_min, acc_max);
#else
ConvDw3x3Int8Window(output, buffer, weight, bias, col_size, row_size, channel, output_h, output_w, in_zp, out_zp,
out_multiplier, left_shift, right_shift, acc_min, acc_max, stride);
#endif
output += 8;
buffer += 8;
weight += 8;

@ -43,6 +43,14 @@ void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *
void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data,
const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,
int task_id);
#ifdef ENABLE_ARM64
void ConvDw3x3Int8Neon64(int8_t *output, const int8_t *input, const int16_t *weight, const int32_t *bias,
int input_col_size, int input_row_size, int channel, int output_h, int output_w, int8_t in_zp,
int32_t out_zp, int out_multiplier, int left_shift, int right_shift, int32_t acc_min,
int32_t acc_max);
#endif
#ifdef __cplusplus
}
#endif

Loading…
Cancel
Save