!12277 add conv and conv3x3 coder
From: @zhujingxuan Reviewed-by: @wangchengyuan,@HilbertDavid Signed-off-by: @wangchengyuanpull/12277/MERGE
commit
f2650ecfc5
@ -0,0 +1,161 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "micro/coder/opcoders/nnacl/int8/conv2d_3x3_int8_coder.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "securec/include/securec.h"
|
||||
#include "nnacl/int8/conv3x3_int8.h"
|
||||
#include "src/runtime/kernel/arm/base/convolution_base.h"
|
||||
#include "src/runtime/kernel/arm/int8/convolution_3x3_int8.h"
|
||||
#include "micro/coder/opcoders/file_collector.h"
|
||||
#include "micro/coder/log.h"
|
||||
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) {
|
||||
int input_channel = conv_param->input_channel_;
|
||||
int output_channel = conv_param->output_channel_;
|
||||
int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
|
||||
int iC8 = UP_DIV(input_channel, C8NUM);
|
||||
|
||||
size_t tmp_size = output_channel * iC8 * C8NUM * kernel_plane * sizeof(int16_t);
|
||||
auto tmp_addr = reinterpret_cast<int16_t *>(malloc(tmp_size));
|
||||
MS_CHECK_PTR_IF_NULL(tmp_addr);
|
||||
int ret = memset_s(tmp_addr, tmp_size, 0, tmp_size);
|
||||
if (ret != EOK) {
|
||||
free(tmp_addr);
|
||||
MS_LOG(ERROR) << "memset_s tmp_addr failed.";
|
||||
return;
|
||||
}
|
||||
PackWeightToC8Int8(origin_weight, tmp_addr, conv_param);
|
||||
Conv3x3Int8FilterTransform(tmp_addr, dst_weight, iC8, output_channel, kernel_plane);
|
||||
free(tmp_addr);
|
||||
}
|
||||
|
||||
int Conv2D3x3Int8Coder::InitWeightBias() {
|
||||
int input_channel = conv_param_->input_channel_;
|
||||
int output_channel = conv_param_->output_channel_;
|
||||
MS_CHECK_TRUE(input_channel > 0, "invalid input_channel");
|
||||
MS_CHECK_TRUE(output_channel > 0, "invalid output_channel");
|
||||
int iC8 = UP_DIV(input_channel, C8NUM);
|
||||
int oC4 = UP_DIV(output_channel, C4NUM);
|
||||
// init weight
|
||||
int transformed_size = iC8 * C8NUM * oC4 * C4NUM * 16 * sizeof(int16_t);
|
||||
transformed_filter_addr_ =
|
||||
static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, transformed_size, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(transformed_filter_addr_);
|
||||
MS_CHECK_RET_CODE(memset_s(transformed_filter_addr_, transformed_size, 0, transformed_size),
|
||||
"memset_s transformed_filter_addr_ failed.");
|
||||
auto *original_weight_addr = reinterpret_cast<int8_t *>(filter_tensor_->data_c());
|
||||
ProcessFilterUint8(original_weight_addr, transformed_filter_addr_, conv_param_);
|
||||
|
||||
// init bias
|
||||
int new_bias_size = oC4 * C4NUM * sizeof(int32_t);
|
||||
new_bias_addr_ = static_cast<int32_t *>(allocator_->Malloc(kNumberTypeInt32, new_bias_size, kOfflinePackWeight));
|
||||
MS_CHECK_PTR(new_bias_addr_);
|
||||
MS_CHECK_RET_CODE(memset_s(new_bias_addr_, new_bias_size, 0, new_bias_size), "memset_s new_bias_addr_ failed.");
|
||||
if (input_tensors_.size() == kInputSize2) {
|
||||
auto *ori_bias_addr = reinterpret_cast<int32_t *>(bias_tensor_->data_c());
|
||||
MS_CHECK_RET_CODE(
|
||||
memcpy_s(new_bias_addr_, output_channel * sizeof(int32_t), ori_bias_addr, output_channel * sizeof(int32_t)),
|
||||
"memset_s new_bias_addr_ failed.");
|
||||
} else {
|
||||
MS_ASSERT(input_tensors_.size() == kInputSize1);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Conv2D3x3Int8Coder::InitTmpBuffer(CoderContext *const context) {
|
||||
int ic8 = UP_DIV(conv_param_->input_channel_, C8NUM);
|
||||
int oc4 = UP_DIV(conv_param_->output_channel_, C4NUM);
|
||||
int in_batch = conv_param_->input_batch_;
|
||||
int input_w = conv_param_->input_w_;
|
||||
int input_h = conv_param_->input_h_;
|
||||
int output_batch = conv_param_->output_batch_;
|
||||
int output_w = conv_param_->output_w_;
|
||||
int output_h = conv_param_->output_h_;
|
||||
|
||||
/*=============================tile_buffer_============================*/
|
||||
tile_buffer_size_ = thread_num_ * TILE_NUM * 16 * ic8 * C8NUM * sizeof(int16_t);
|
||||
tile_buffer_ = static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, tile_buffer_size_, kWorkspace));
|
||||
|
||||
/*=============================block_unit_buffer_============================*/
|
||||
block_unit_buffer_size_ = thread_num_ * 4 * 4 * C8NUM * sizeof(int16_t);
|
||||
block_unit_buffer_ =
|
||||
static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, block_unit_buffer_size_, kWorkspace));
|
||||
|
||||
/*=============================tmp_dst_buffer_============================*/
|
||||
tmp_dst_buffer_size_ = thread_num_ * TILE_NUM * 16 * oc4 * C4NUM * sizeof(int32_t);
|
||||
tmp_dst_buffer_ = static_cast<int32_t *>(allocator_->Malloc(kNumberTypeInt32, tmp_dst_buffer_size_, kWorkspace));
|
||||
|
||||
/*=============================tmp_out_============================*/
|
||||
tmp_out_size_ = oc4 * C4NUM * output_batch * output_w * output_h * sizeof(uint8_t);
|
||||
tmp_out_ = static_cast<uint8_t *>(allocator_->Malloc(kNumberTypeUInt8, tmp_out_size_, kWorkspace));
|
||||
|
||||
/*=============================input_data_============================*/
|
||||
c8_input_size_ = in_batch * input_h * input_w * ic8 * C8NUM * sizeof(int16_t);
|
||||
c8_input_ = static_cast<int16_t *>(allocator_->Malloc(kNumberTypeInt16, c8_input_size_, kWorkspace));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void Conv2D3x3Int8Coder::ConfigInputOutput() { output_tensor_->set_format(schema::Format_NHWC); }
|
||||
|
||||
int Conv2D3x3Int8Coder::Prepare(CoderContext *const context) {
|
||||
conv_param_->thread_num_ = thread_num_;
|
||||
// to 1, task id is set to 0
|
||||
conv_param_->op_parameter_.thread_num_ = thread_num_;
|
||||
MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "ConvolutionBase init failed.");
|
||||
MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed.");
|
||||
MS_CHECK_RET_CODE(InitWeightBias(), "Init weight bias failed.");
|
||||
// init tmp input, output
|
||||
MS_CHECK_RET_CODE(InitTmpBuffer(context), "Init tmp buffer failed.");
|
||||
// config input output
|
||||
ConfigInputOutput();
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int Conv2D3x3Int8Coder::DoCode(CoderContext *const context) {
|
||||
Collect(context, {"nnacl/int8/conv_int8.h"}, {"pack.c", "conv_int8.c", "fixed_point.c"});
|
||||
nnacl::NNaclInt8Serializer code;
|
||||
code.precision(kPrecision);
|
||||
// call the op function
|
||||
code.CodeFunction("memset", tile_buffer_, 0, tile_buffer_size_);
|
||||
code.CodeFunction("memset", block_unit_buffer_, 0, block_unit_buffer_size_);
|
||||
code.CodeFunction("memset", tmp_dst_buffer_, 0, tmp_dst_buffer_size_);
|
||||
code.CodeFunction("memset", tmp_out_, 0, tmp_out_size_);
|
||||
code.CodeFunction("memset", c8_input_, 0, c8_input_size_);
|
||||
|
||||
// define conv params
|
||||
code.CodeStruct("conv_param_", *conv_param_);
|
||||
// pack to c8
|
||||
code.CodeFunction("PackInputToC8Int8", input_tensor_, c8_input_, "&conv_param_");
|
||||
// code operator func
|
||||
if (thread_num_ > 1) {
|
||||
code.CodeBaseStruct("Conv3x3Int8Args", "args", c8_input_, transformed_filter_addr_, new_bias_addr_, output_tensor_,
|
||||
tile_buffer_, block_unit_buffer_, tmp_dst_buffer_, tmp_out_, "&conv_param_");
|
||||
code.CodeFunction("ParallelLaunch", "THREAD_POOL_DEFAULT", "Conv3x3Int8Run", "&args", "thread_num");
|
||||
} else {
|
||||
int task_id = 0;
|
||||
code.CodeFunction("Conv3x3Int8", c8_input_, transformed_filter_addr_, new_bias_addr_, output_tensor_, tile_buffer_,
|
||||
block_unit_buffer_, tmp_dst_buffer_, tmp_out_, task_id, "&conv_param_");
|
||||
}
|
||||
code.CodeFunction("PackNC4HW4ToNHWCInt8", tmp_out_, output_tensor_, conv_param_->output_batch_,
|
||||
conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_);
|
||||
context->AppendCode(code.str());
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite::micro::nnacl
|
@ -0,0 +1,61 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_Conv2D_3X3_INT8_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_Conv2D_3X3_INT8_CODER_H_
|
||||
#include "micro/coder/opcoders/base/conv2d_base_coder.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "nnacl/conv_parameter.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class Conv2D3x3Int8Coder final : public Conv2DBaseCoder {
|
||||
public:
|
||||
Conv2D3x3Int8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
~Conv2D3x3Int8Coder() override = default;
|
||||
|
||||
private:
|
||||
int InitWeightBias();
|
||||
|
||||
void ConfigInputOutput();
|
||||
|
||||
int InitTmpBuffer(CoderContext *ctx);
|
||||
|
||||
int16_t *transformed_filter_addr_{nullptr};
|
||||
int32_t *new_bias_addr_{nullptr};
|
||||
|
||||
int16_t *block_unit_buffer_{nullptr};
|
||||
int16_t *tile_buffer_{nullptr};
|
||||
int32_t *tmp_dst_buffer_{nullptr};
|
||||
uint8_t *tmp_out_{nullptr};
|
||||
int16_t *c8_input_{nullptr};
|
||||
|
||||
size_t tile_buffer_size_{0};
|
||||
size_t block_unit_buffer_size_{0};
|
||||
size_t tmp_dst_buffer_size_{0};
|
||||
size_t tmp_out_size_{0};
|
||||
size_t c8_input_size_{0};
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_Conv2D_3X3_INT8_CODER_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_INT8_CODER_H_
|
||||
#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_INT8_CODER_H_
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "micro/coder/opcoders/base/conv2d_base_coder.h"
|
||||
#include "nnacl/conv_parameter.h"
|
||||
#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h"
|
||||
|
||||
namespace mindspore::lite::micro::nnacl {
|
||||
class Conv2DINT8Coder final : public Conv2DBaseCoder {
|
||||
public:
|
||||
explicit Conv2DINT8Coder(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
|
||||
const Model::Node *node, size_t node_index, Target target)
|
||||
: Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
|
||||
|
||||
int Prepare(CoderContext *const context) override;
|
||||
|
||||
int DoCode(CoderContext *const context) override;
|
||||
|
||||
~Conv2DINT8Coder() override = default;
|
||||
|
||||
private:
|
||||
int InitWeightBias(CoderContext *ctx);
|
||||
|
||||
void CheckSupportOptimize();
|
||||
|
||||
int InitTmpBuffer(CoderContext *ctx);
|
||||
|
||||
int Resize();
|
||||
|
||||
int8_t *packed_weight_{nullptr};
|
||||
int32_t *bias_data_{nullptr};
|
||||
int32_t *filter_zp_ptr_{nullptr};
|
||||
|
||||
int thread_count_{1};
|
||||
int tile_num_{0};
|
||||
|
||||
bool support_optimize_{true};
|
||||
bool filter_peroc_{false};
|
||||
|
||||
size_t packed_input_size_{0};
|
||||
size_t input_sum_size_{0};
|
||||
size_t matmul_packed_input_size_{0};
|
||||
|
||||
int8_t *packed_input_{nullptr};
|
||||
int32_t *input_sum_{nullptr};
|
||||
int8_t *matmul_packed_input_{nullptr};
|
||||
|
||||
string matmul_func_;
|
||||
|
||||
std::function<int(nnacl::NNaclInt8Serializer &, const std::string &, const std::string &)> pack_weight_init_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite::micro::nnacl
|
||||
#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_INT8_CODER_H_
|
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "wrapper/int8/conv_init_int8.h"
|
||||
#include <memory.h>
|
||||
#include "nnacl/op_base.h"
|
||||
#include "nnacl/int8/matmul_int8.h"
|
||||
#include "nnacl/errorcode.h"
|
||||
|
||||
int ConvInit(int8_t *origin_weight, const int32_t *ori_bias, const int32_t *filter_quant_zps, int kernel_h,
|
||||
int kernel_w, int input_channel, int output_channel, int32_t input_zp, bool filter_peroc,
|
||||
bool support_optimize, int8_t **packed_weight, int32_t **bias_data) {
|
||||
int8_t *packed_weight_ = NULL;
|
||||
int32_t *bias_data_ = NULL;
|
||||
int kernel_plane = kernel_h * kernel_w;
|
||||
int up_round_deep;
|
||||
int up_round_oc;
|
||||
#ifdef ENABLE_ARM32
|
||||
up_round_oc = UP_ROUND(output_channel, C2NUM);
|
||||
up_round_deep = UP_ROUND(kernel_plane * input_channel, C16NUM);
|
||||
#else
|
||||
if (support_optimize) {
|
||||
up_round_oc = UP_ROUND(output_channel, C8NUM);
|
||||
up_round_deep = UP_ROUND(kernel_plane * input_channel, C4NUM);
|
||||
} else {
|
||||
up_round_oc = UP_ROUND(output_channel, C4NUM);
|
||||
up_round_deep = UP_ROUND(kernel_plane * input_channel, C16NUM);
|
||||
}
|
||||
#endif
|
||||
int pack_weight_size = up_round_oc * up_round_deep;
|
||||
size_t bias_size = up_round_oc * sizeof(int32_t);
|
||||
|
||||
// init weight
|
||||
packed_weight_ = (int8_t *)(malloc(pack_weight_size));
|
||||
if (packed_weight_ == NULL) {
|
||||
return NNACL_ERR;
|
||||
}
|
||||
memset(packed_weight_, 0, pack_weight_size);
|
||||
#ifdef ENABLE_ARM32
|
||||
RowMajor2Row2x16MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_plane);
|
||||
#else
|
||||
if (support_optimize) {
|
||||
RowMajor2Row8x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_plane);
|
||||
} else {
|
||||
RowMajor2Row16x4MajorInt8(origin_weight, packed_weight_, output_channel, input_channel * kernel_plane);
|
||||
}
|
||||
#endif
|
||||
|
||||
// init bias
|
||||
bias_data_ = (int32_t *)(malloc(bias_size));
|
||||
if (bias_data_ == NULL) {
|
||||
free(packed_weight_);
|
||||
return NNACL_ERR;
|
||||
}
|
||||
memset(bias_data_, 0, bias_size);
|
||||
if (ori_bias != NULL) {
|
||||
memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t));
|
||||
}
|
||||
|
||||
for (int oc = 0; oc < output_channel; oc++) {
|
||||
int32_t filter_zp = filter_quant_zps[0];
|
||||
if (filter_peroc) {
|
||||
filter_zp = filter_quant_zps[oc];
|
||||
}
|
||||
int32_t weight_sum_value = up_round_deep * filter_zp;
|
||||
for (int i = 0; i < kernel_plane * input_channel; i++) {
|
||||
weight_sum_value += origin_weight[oc * kernel_plane * input_channel + i] - filter_zp;
|
||||
}
|
||||
bias_data_[oc] += filter_zp * input_zp * up_round_deep - weight_sum_value * input_zp;
|
||||
}
|
||||
|
||||
*packed_weight = packed_weight_;
|
||||
*bias_data = bias_data_;
|
||||
return NNACL_OK;
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_MICRO_INT8_CONV_INIT_H_
|
||||
#define MINDSPORE_LITE_MICRO_INT8_CONV_INIT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
int ConvInit(int8_t *origin_weight, const int32_t *ori_bias, const int32_t *filter_quant_zps, int kernel_h,
|
||||
int kernel_w, int input_channel, int output_channel, int32_t input_zp, bool filter_peroc,
|
||||
bool support_optimize, int8_t **packed_weight, int32_t **bias_data);
|
||||
|
||||
#endif // MINDSPORE_LITE_MICRO_INT8_CONV_INIT_H_
|
Loading…
Reference in new issue