[MSLITE][Develop] arm cpu int8 conv depthwise support activation per channel

pull/6038/head
yangruoqi713 5 years ago
parent 9ca16d3c6c
commit 7175e1921e

File diff suppressed because it is too large Load Diff

@ -45,10 +45,11 @@ void IndirectGemmInt8_4x4(int8_t *output, const int8_t *input, const int8_t *wei
void DeconvDwInt8Center(int32_t *dst, const int16_t *src, const int16_t *weight, size_t height, size_t width,
size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel, size_t in_sh_step,
size_t in_sw_step, size_t in_kh_step, size_t in_kw_step);
void ConvDwInt8Center(int8_t *dst, const int16_t *src, const int16_t *weight, const int32_t *bias, size_t height,
void ConvDwInt8Center(int8_t *dst, const int8_t *src, const int16_t *weight, const int32_t *bias, size_t height,
size_t width, size_t kernel_h, size_t kernel_w, size_t out_h_step, size_t block_channel,
size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, int out_multiplier,
int left_shift, int right_shift, int32_t out_zp, int32_t acc_min, int32_t acc_max);
size_t in_sh_step, size_t in_sw_step, size_t in_kh_step, size_t in_kw_step, int8_t *in_zp,
int32_t *out_zp, int32_t *out_multiplier, int32_t *left_shift, int32_t *right_shift,
int32_t *acc_min, int32_t *acc_max);
void ConvDwInt8Row(int32_t *output_ptr, const int8_t *input_ptr, const int16_t *weight_ptr, int num_pixels,
int output_channel, int input_step, int8_t input_zp);
void ConvDwInt8PostAlign4(int8_t *dst, int32_t *buffer, int num_pixels, int32_t output_zp, int32_t out_multiplier,

File diff suppressed because it is too large Load Diff

@ -27,8 +27,9 @@ extern "C" {
void ConvDwInt8(int8_t *output_data, int32_t *output_row, const int8_t *input_data, const int16_t *weight_data,
const int32_t *bias_data, const ConvParameter *conv_param, int task_id);
void ConvDwSWInt8(int8_t *output_data, const int16_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
const ConvParameter *conv_param, const SlidingWindowParam *sliding, int task_id);
void ConvDwSWInt8(int8_t *output_data, const int8_t *input_data, const int16_t *weight_data, const int32_t *bias_data,
int8_t *input_zp, int32_t *output_zp, const ConvParameter *conv_param,
const SlidingWindowParam *sliding, int task_id);
void DeconvDwInt8(int8_t *output_data, int32_t *output_buffer, const int16_t *input_data, const int16_t *weight_data,
const int32_t *bias_data, const ConvParameter *conv_param, const SlidingWindowParam *sliding,

@ -869,6 +869,45 @@ void PackNHWC4ToNHWCInt8(const void *src, void *dst, int batch, int plane, int c
}
}
void PackNHWCToNHWC8Int8(const void *src, void *dst, int batch, int plane, int channel) {
int c8 = UP_DIV(channel, C8NUM);
int nhwc8_batch_unit_offset = c8 * C8NUM * plane;
int ic_remainder_ = channel % C8NUM;
if (ic_remainder_ != 0) {
int nhwc8_batch_offset = 0;
for (int b = 0; b < batch; b++) {
int batch_offset = b * channel * plane;
for (int i = 0; i < plane; i++) {
memcpy((int8_t *)dst + nhwc8_batch_offset + i * c8 * C8NUM, (int8_t *)src + batch_offset + i * channel,
channel);
}
nhwc8_batch_offset += nhwc8_batch_unit_offset;
}
} else {
size_t ori_input_size = batch * plane * channel;
memcpy((int8_t *)dst, (int8_t *)src, ori_input_size);
}
}
void PackNHWC8ToNHWCInt8(const void *src, void *dst, int batch, int plane, int channel) {
int c8 = UP_DIV(channel, C8NUM);
int nhwc8_batch_unit_offset = c8 * C8NUM * plane;
int ic_remainder_ = channel % C8NUM;
if (ic_remainder_ != 0) {
for (int b = 0; b < batch; b++) {
int batch_offset = b * channel * plane;
int nhwc8_batch_offset = b * nhwc8_batch_unit_offset;
for (int i = 0; i < plane; i++) {
memcpy((int8_t *)dst + batch_offset + i * channel, (int8_t *)src + nhwc8_batch_offset + i * c8 * C8NUM,
channel);
}
}
} else {
size_t ori_input_size = batch * plane * channel;
memcpy((int8_t *)dst, (int8_t *)src, ori_input_size);
}
}
void PackNCHWToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel) {
int nhwc4_batch_offset = 0;
int c4 = UP_DIV(channel, C4NUM);
@ -1174,6 +1213,25 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
ConvQuantArg *quant_qrg) {
int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
for (int c = 0; c < channel; c++) {
if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
weight_zp = quant_qrg->filter_quant_args_[c].zp_;
}
int c8_block_num = c / C8NUM;
int c8_block_rem = c % C8NUM;
const int8_t *src_c = origin_weight + c * plane;
int16_t *dst_c = packed_weight_ + c8_block_num * plane * C8NUM;
for (int k = 0; k < plane; k++) {
const int8_t *src_kernel = src_c + k;
int16_t *dst_kernel = dst_c + C8NUM * k + c8_block_rem;
*dst_kernel = (int16_t)(src_kernel[0] - weight_zp);
}
}
}
void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
ConvQuantArg *quant_qrg) {
int weight_zp = quant_qrg->filter_quant_args_[0].zp_;
for (int c = 0; c < channel; c++) {
if (quant_qrg->per_channel_ & FILTER_PER_CHANNEL) {
weight_zp = quant_qrg->filter_quant_args_[c].zp_;

@ -96,6 +96,10 @@ void PackNHWCToNHWC4Int8(const void *src, void *dst, int batch, int plane, int c
void PackNHWC4ToNHWCInt8(const void *src, void *dst, int batch, int plane, int channel);
void PackNHWCToNHWC8Int8(const void *src, void *dst, int batch, int plane, int channel);
void PackNHWC8ToNHWCInt8(const void *src, void *dst, int batch, int plane, int channel);
void PackNCHWToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel);
void PackNC4HW4ToNHWC4Int8(const void *src, void *dst, int batch, int plane, int channel);
@ -114,6 +118,9 @@ void PackDepthwiseInt8Input(const int8_t *src, int16_t *dst, const ConvParameter
void PackDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
ConvQuantArg *quant_qrg);
void PackDeconvDepthwiseInt8Weight(const int8_t *origin_weight, int16_t *packed_weight_, int plane, int channel,
ConvQuantArg *quant_qrg);
#ifdef __cplusplus
}
#endif

@ -177,8 +177,17 @@ kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *>
const mindspore::lite::PrimitiveC *primitive) {
MS_ASSERT(opParameter != nullptr);
MS_ASSERT(desc.type == schema::PrimitiveType_DepthwiseConv2D);
auto kernel =
new (std::nothrow) kernel::ConvolutionDepthwiseInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
kernel::LiteKernel *kernel;
auto act_quant_size =
MSMAX(inputs[kInputIndex]->GetQuantParams().size(), outputs[kOutputIndex]->GetQuantParams().size());
if (act_quant_size == 1) { // per tensor
kernel = new (std::nothrow) kernel::ConvolutionDepthwiseInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
} else { // per channel
kernel =
new (std::nothrow) kernel::ConvolutionDepthwiseSWInt8CPUKernel(opParameter, inputs, outputs, ctx, primitive);
}
if (kernel == nullptr) {
MS_LOG(ERROR) << "kernel is nullptr.";
return nullptr;

@ -40,11 +40,21 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id);
private:
int ReinitQuantParam();
int ReinitFreeBefore();
void FreeTmpQuant();
SlidingWindowParam *sliding = nullptr;
int16_t *packed_weight_ = nullptr;
int16_t *packed_input_ = nullptr;
int8_t *packed_input_ = nullptr;
int8_t *packed_output_ = nullptr;
bool need_align_ = false;
int8_t *input_zp_ = nullptr;
float *input_scale_ = nullptr;
float *weight_scale_ = nullptr;
int32_t *output_zp_ = nullptr;
float *output_scale_ = nullptr;
};
} // namespace mindspore::kernel

@ -52,8 +52,8 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
PackDeconvDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
bias_data_ = reinterpret_cast<int32_t *>(malloc(C4NUM * OC4 * sizeof(int32_t)));
if (bias_data_ == nullptr) {

Loading…
Cancel
Save