!8759 [MSLITE] conv 1x1 int8 parallel support by hw and oc

From: @ling_qiao_min
Reviewed-by: 
Signed-off-by:
pull/8759/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 25f75bd8ab

@ -804,8 +804,8 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int
const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift, const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) { int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) {
int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1; int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
matmul_func(packed_input, packed_weight, dst, row, col, deep4, col, input_sum, bias, left_shift, right_shift, matmul_func(packed_input, packed_weight, dst, row, col, deep4, conv_param->output_channel_, input_sum, bias,
multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_, left_shift, right_shift, multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0], is_per_oc, conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0], is_per_oc,
filter_zp); filter_zp);
return; return;

@ -292,7 +292,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
for (int c = 0; c < col; c++) { for (int c = 0; c < col; c++) {
int r4div = r / C4NUM, r4mod = r % C4NUM; int r4div = r / C4NUM, r4mod = r % C4NUM;
int c16div = c / C16NUM, c16mod = c % C16NUM; int c16div = c / C16NUM, c16mod = c % C16NUM;
size_t ci = r * col + c; size_t ci = r * stride + c;
int32_t value = 0; int32_t value = 0;
for (int d = 0; d < deep_4; d++) { for (int d = 0; d < deep_4; d++) {
int d4div = d / C4NUM, d4mod = d % C4NUM; int d4div = d / C4NUM, d4mod = d % C4NUM;

@ -45,12 +45,17 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
void FreeRunBuf(); void FreeRunBuf();
public: public:
int DoRun(int task_id); int OcRun(int task_id);
int HwRun(int task_id);
int OcOptPre(int task_id);
private: private:
int RunArm32(int task_id); int RunArm32Oc(int task_id);
int RunArm64(int task_id); int RunArm64Oc(int task_id);
int RunArm64Opt(int task_id); int RunArm64OptOc(int task_id);
int RunArm32Hw(int task_id);
int RunArm64Hw(int task_id);
int RunArm64OptHw(int task_id);
private: private:
void FreeResizeBuf(); void FreeResizeBuf();
@ -71,9 +76,12 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
int8_t *packed_input_ = nullptr; int8_t *packed_input_ = nullptr;
int8_t *input_ptr_ = nullptr; int8_t *input_ptr_ = nullptr;
int8_t *output_ptr_ = nullptr; int8_t *output_ptr_ = nullptr;
size_t thread_count_ = 1; size_t thread_count_hw_ = 1;
size_t thread_stride_ = 0; size_t thread_stride_hw_ = 0;
size_t thread_count_oc_ = 1;
size_t thread_stride_oc_ = 0;
bool pre_trans_input_ = false; bool pre_trans_input_ = false;
bool parallel_by_oc_ = false;
size_t input_sum_size_ = 0; size_t input_sum_size_ = 0;
MatMulParameter *matmul_param_ = nullptr; MatMulParameter *matmul_param_ = nullptr;
MATMUL_OPT_DP_FUNC matmul_func_ = nullptr; MATMUL_OPT_DP_FUNC matmul_func_ = nullptr;

Loading…
Cancel
Save