!8759 [MSLITE] conv 1x1 int8 parallel support by hw and oc

From: @ling_qiao_min Reviewed-by: Signed-off-by:
5 years ago · 25f75bd8ab
parent 85a020575a 438ded3baf
commit 25f75bd8ab
4 changed files with 197 additions and 59 deletions
--- a/mindspore/lite/nnacl/int8/conv_int8.c
+++ b/mindspore/lite/nnacl/int8/conv_int8.c
@ -804,8 +804,8 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int
                    const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) {
  int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
-  matmul_func(packed_input, packed_weight, dst, row, col, deep4, col, input_sum, bias, left_shift, right_shift,
+  matmul_func(packed_input, packed_weight, dst, row, col, deep4, conv_param->output_channel_, input_sum, bias,
-              multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
+              left_shift, right_shift, multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
              conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0], is_per_oc,
              filter_zp);
  return;
--- a/mindspore/lite/nnacl/int8/matmul_int8.c
+++ b/mindspore/lite/nnacl/int8/matmul_int8.c
@ -292,7 +292,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
    for (int c = 0; c < col; c++) {
      int r4div = r / C4NUM, r4mod = r % C4NUM;
      int c16div = c / C16NUM, c16mod = c % C16NUM;
-      size_t ci = r * col + c;
+      size_t ci = r * stride + c;
      int32_t value = 0;
      for (int d = 0; d < deep_4; d++) {
        int d4div = d / C4NUM, d4mod = d % C4NUM;
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.cc
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
@ -45,12 +45,17 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
  void FreeRunBuf();
 public:
-  int DoRun(int task_id);
+  int OcRun(int task_id);
  int HwRun(int task_id);
  int OcOptPre(int task_id);
 private:
-  int RunArm32(int task_id);
+  int RunArm32Oc(int task_id);
-  int RunArm64(int task_id);
+  int RunArm64Oc(int task_id);
-  int RunArm64Opt(int task_id);
+  int RunArm64OptOc(int task_id);
  int RunArm32Hw(int task_id);
  int RunArm64Hw(int task_id);
  int RunArm64OptHw(int task_id);
 private:
  void FreeResizeBuf();
@ -71,9 +76,12 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
  int8_t *packed_input_ = nullptr;
  int8_t *input_ptr_ = nullptr;
  int8_t *output_ptr_ = nullptr;
-  size_t thread_count_ = 1;
+  size_t thread_count_hw_ = 1;
-  size_t thread_stride_ = 0;
+  size_t thread_stride_hw_ = 0;
  size_t thread_count_oc_ = 1;
  size_t thread_stride_oc_ = 0;
  bool pre_trans_input_ = false;
  bool parallel_by_oc_ = false;
  size_t input_sum_size_ = 0;
  MatMulParameter *matmul_param_ = nullptr;
  MATMUL_OPT_DP_FUNC matmul_func_ = nullptr;