From d6abf2b531b3ea5688b3d61df8a450837cc8312e Mon Sep 17 00:00:00 2001 From: yangjie159 Date: Fri, 19 Feb 2021 15:34:43 +0800 Subject: [PATCH] fix bugs of micro, and add deconvolution int8 coder --- mindspore/lite/micro/CMakeLists.txt | 2 +- mindspore/lite/micro/cmake/file_list.cmake | 3 + .../component/benchmark_component.cc | 2 +- .../generator/component/train_component.cc | 2 +- .../coder/generator/train/train_generator.cc | 14 +- .../nnacl/int8/deconvolution_int8_coder.cc | 161 ++++++++++++++++++ .../nnacl/int8/deconvolution_int8_coder.h | 63 +++++++ mindspore/lite/micro/coder/session.cc | 3 +- 8 files changed, 233 insertions(+), 17 deletions(-) create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc create mode 100644 mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.h diff --git a/mindspore/lite/micro/CMakeLists.txt b/mindspore/lite/micro/CMakeLists.txt index b933d78f82..d49c00d88f 100644 --- a/mindspore/lite/micro/CMakeLists.txt +++ b/mindspore/lite/micro/CMakeLists.txt @@ -35,7 +35,7 @@ ms_build_flatbuffers_lite(FBS_FILES if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") MESSAGE("******Micro Debug********") set(CMAKE_C_FLAGS "-Wall -Werror -ftrapv -DDebug -g -fvisibility=default ${CMAKE_C_FLAGS}") - set(CMAKE_CXX_FLAGS "-Wall -Werror-ftrapv -DDebug -g -fvisibility=default ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS "-Wall -Werror -ftrapv -DDebug -g -fvisibility=default ${CMAKE_CXX_FLAGS}") else() MESSAGE(" ******Micro Release********") set(CMAKE_C_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror \ diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index e0842dd1d1..80b6f3d413 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -84,6 +84,7 @@ set(CODER_OPCODERS_SRC ${MICRO_DIR}/coder/opcoders/nnacl/int8/conv2d_1x1_int8_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/int8/conv2d_3x3_int8_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc + ${MICRO_DIR}/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/int8/pooling_int8_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/int8/reduce_int8_coder.cc ${MICRO_DIR}/coder/opcoders/nnacl/int8/reshape_int8_coder.cc @@ -129,6 +130,8 @@ set(LITE_KERNEL_SRC ${LITE_DIR}/nnacl/int8/conv3x3_int8.c ${LITE_DIR}/nnacl/int8/conv1x1_int8.c ${LITE_DIR}/nnacl/base/conv1x1_base.c + ${LITE_DIR}/nnacl/int8/deconv_int8.c + ${LITE_DIR}/nnacl/int8/common_func_int8.c ) list(APPEND FILE_SET ${CODER_SRC} ${CODER_UTILS_SRC} ${CODER_OPCODERS_SRC} ${CODER_GENERATOR_SRC} diff --git a/mindspore/lite/micro/coder/generator/component/benchmark_component.cc b/mindspore/lite/micro/coder/generator/component/benchmark_component.cc index 3b42e2a2bf..71ab14bfdd 100644 --- a/mindspore/lite/micro/coder/generator/component/benchmark_component.cc +++ b/mindspore/lite/micro/coder/generator/component/benchmark_component.cc @@ -184,7 +184,7 @@ void CodeBenchmarkFreeResourse(std::ofstream &ofs, const std::string &module_nam ofs << " for (int i = 0; i < " << inputs_num << "; ++i) {\n"; ofs << " free(inputs_binbuf[i]);\n" " }\n" - " return RET_OK;" + " return RET_OK;\n" "}\n\n"; } diff --git a/mindspore/lite/micro/coder/generator/component/train_component.cc b/mindspore/lite/micro/coder/generator/component/train_component.cc index 66b5512f23..866b284418 100644 --- a/mindspore/lite/micro/coder/generator/component/train_component.cc +++ b/mindspore/lite/micro/coder/generator/component/train_component.cc @@ -133,7 +133,7 @@ void CodeTrainImplement(std::ofstream &ofs, const std::string &module_name, cons result += "}"; return result; }; - auto wrap = [](int i) { return "[" + std::to_string(i) + "]"; }; + auto wrap = [](size_t i) { return "[" + std::to_string(i) + "]"; }; auto offset_inputs = [&]() { std::string src = "origin_inputs"; std::string dst = "input_ptr"; diff --git a/mindspore/lite/micro/coder/generator/train/train_generator.cc b/mindspore/lite/micro/coder/generator/train/train_generator.cc index b8aa287098..bfb7d87d0b 100644 --- a/mindspore/lite/micro/coder/generator/train/train_generator.cc +++ b/mindspore/lite/micro/coder/generator/train/train_generator.cc @@ -27,7 +27,7 @@ void TrainGenerator::CodeGradientFunc(std::ofstream &ofs) const { ofs << "float " << config_->module_name() << "_ComputeLossAndGradient() {\n"; ofs << " float loss = 0;\n"; for (const auto &block : ctx_->train_blocks()) { - ofs << " {\n" << block << " }\n"; + ofs << "\t{\n" << block << "\t}\n"; } ofs << " return loss;\n"; ofs << "}\n"; @@ -45,9 +45,6 @@ int TrainGenerator::CodeNetHFile() { ofs << "#include \"microtensor.h\"\n\n"; CodeTrainParams(ofs); CodeInputAndOutputState(ofs, config_->module_name()); - if (is_get_quant_args_) { - CodeGraphQuantArgsState(ofs, config_->module_name()); - } if (config_->is_weight_file()) { CodeInitWeightState(ofs, config_->module_name()); } @@ -68,9 +65,6 @@ int TrainGenerator::CodeNetCFile() { CodeInitResourceImplement(ofs, config_->module_name(), ctx_); CodeFreeResourceImplement(ofs, config_->module_name(), ctx_); CodeFeaturesImplement(ofs, config_->module_name(), ctx_); - if (is_get_quant_args_) { - CodeGraphQuantArgsImplement(ofs, config_->module_name(), ctx_); - } CodeNetRunFunc(ofs); CodeGradientFunc(ofs); CodeTrainImplement(ofs, config_->module_name(), ctx_); @@ -85,22 +79,16 @@ int TrainGenerator::CodeBenchmarkFile() { MS_CHECK_TRUE(!ofs.bad(), "filed to open file"); std::vector inputs = ctx_->graph_inputs(); size_t inputs_num = inputs.size(); - CodeBenchmarkHeader(ofs, net_inc_hfile_); CodeBenchmarkUsage(ofs); CodeBenchmarkWarmup(ofs, config_->module_name()); - CodeBenchmarkSetInputs(ofs, config_->module_name(), ctx_); CodeBenchmarkSetBuffer(ofs, config_->module_name()); if (config_->is_weight_file()) { CodeBenchmarkInitWeight(ofs, config_->module_name()); } - if (config_->code_mode() == CodeMode::Code_Inference) { - CodeBenchmarkConfigThread(ofs); - } CodeBenchmarkInference(ofs, config_->module_name()); CodeBenchmarkPrintOutputs(ofs, config_->module_name()); - CodeBenchmarkFreeResourse(ofs, config_->module_name(), inputs_num); ofs.close(); return RET_OK; diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc new file mode 100644 index 0000000000..ea9242fcc8 --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.cc @@ -0,0 +1,161 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.h" +#include +#include "nnacl/int8/deconv_int8.h" +#include "micro/coder/opcoders/file_collector.h" +#include "micro/coder/opcoders/serializers/nnacl_serializer/nnacl_int8_serializer.h" + +using mindspore::schema::PrimitiveType_DeConv2D; + +namespace mindspore::lite::micro::nnacl { + +int DeconvolutionInt8Coder::Init(CoderContext *const context) { + CheckSupportOptimize(); + MS_CHECK_RET_CODE(SetQuantParam(), "deconv int8 SetQuantParam error!"); + MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder SetQuantParam error!"); + MS_CHECK_RET_CODE(InitParam(), "deconv int8 InitParam error!"); + MS_CHECK_RET_CODE(InitBiasWeight(context), "deconv int8 InitBiasWeight error!"); + MS_CHECK_RET_CODE(InitData(context), "deconv int8 InitData error!"); + return RET_OK; +} + +int DeconvolutionInt8Coder::Prepare(CoderContext *const context) { + conv_param_->thread_num_ = thread_num_; + conv_param_->op_parameter_.thread_num_ = thread_num_; + thread_count_ = thread_num_; + MS_CHECK_RET_CODE(Init(context), "deconv int8 Init error!"); + MS_CHECK_RET_CODE(InitRunBuf(context), "deconv int8 InitRunBuf error!"); + return 0; +} + +void DeconvolutionInt8Coder::CheckSupportOptimize() { + support_optimize_ = false; + matmul_func_str_ = "NULL"; +} + +int DeconvolutionInt8Coder::InitParam() { + matmul_param_ = new (std::nothrow) MatMulParameter(); + MS_CHECK_PTR(matmul_param_); + matmul_param_->row_ = conv_param_->input_h_ * conv_param_->input_w_; + matmul_param_->deep_ = conv_param_->input_channel_; + matmul_param_->col_ = conv_param_->output_channel_ * conv_param_->kernel_h_ * conv_param_->kernel_w_; + + /* optimize normal -> same data layout */ + int oc4 = UP_DIV(conv_param_->output_channel_, C4NUM); + thread_count_ = MSMIN(conv_param_->op_parameter_.thread_num_, oc4); + MS_CHECK_TRUE(thread_count_ > 0, "thread_count_ <= 0"); + thread_stride_ = UP_DIV(oc4, thread_count_); + return RET_OK; +} + +int DeconvolutionInt8Coder::InitBiasWeight(CoderContext *const context) { + MS_CHECK_TRUE(conv_param_->output_channel_ > 0, "invalid output_channel"); + int size = UP_ROUND(conv_param_->output_channel_, C4NUM) * sizeof(int32_t); + bias_data_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt32, size, kOfflinePackWeight)); + MS_CHECK_PTR(bias_data_); + MS_CHECK_RET_CODE(memset_s(bias_data_, size, 0, size), "memset_s new_bias_addr_ failed."); + if (input_tensors_.size() == kInputSize2) { + auto *ori_bias_addr = reinterpret_cast(bias_tensor_->data_c()); + MS_CHECK_RET_CODE(memcpy_s(bias_data_, conv_param_->output_channel_ * sizeof(int32_t), ori_bias_addr, + conv_param_->output_channel_ * sizeof(int32_t)), + "memcpy_s new_bias_addr_ failed."); + } + + size = UP_ROUND(conv_param_->output_channel_, C4NUM) * UP_ROUND(conv_param_->input_channel_, C16NUM) * + conv_param_->kernel_w_ * conv_param_->kernel_h_ * sizeof(int8_t); + weight_ptr_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt8, size, kOfflinePackWeight)); + MS_CHECK_PTR(weight_ptr_); + MS_CHECK_RET_CODE( + memset_s(weight_ptr_, size, static_cast(conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_), size), + "memset_s weight_ptr_ failed."); + DeConvWeightTransInt8(reinterpret_cast(filter_tensor_->data_c()), weight_ptr_, conv_param_->input_channel_, + conv_param_->output_channel_, conv_param_->kernel_h_ * conv_param_->kernel_w_, + support_optimize_); + + size = UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_h_ * conv_param_->kernel_w_; + weight_sum_ = + reinterpret_cast(allocator_->Malloc(kNumberTypeInt32, size * sizeof(int32_t), kOfflinePackWeight)); + MS_CHECK_PTR(weight_sum_); + MS_CHECK_RET_CODE(memset_s(weight_sum_, size * sizeof(int32_t), 0, size * sizeof(int32_t)), + "memset_s weight_sum_ failed."); + DeConvPackWeightSum(weight_ptr_, weight_sum_, conv_param_->conv_quant_arg_.input_quant_args_[0].zp_, + conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->deep_, C16NUM), + size, support_optimize_); + + return RET_OK; +} + +int DeconvolutionInt8Coder::InitData(CoderContext *const context) { + input_ptr_size_ = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) * + UP_ROUND(conv_param_->input_channel_, C16NUM) * sizeof(int8_t); + input_ptr_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt8, input_ptr_size_, kWorkspace)); + return RET_OK; +} + +int DeconvolutionInt8Coder::InitRunBuf(CoderContext *const context) { + tmp_buffer_size_ = UP_ROUND(conv_param_->input_h_ * conv_param_->input_w_, C4NUM) * + UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->kernel_w_ * conv_param_->kernel_h_ * + sizeof(int32_t); + tmp_buffer_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt32, tmp_buffer_size_, kWorkspace)); + + tmp_output_size_ = + UP_ROUND(conv_param_->output_channel_, C4NUM) * conv_param_->output_h_ * conv_param_->output_w_ * sizeof(int32_t); + tmp_output_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt32, tmp_output_size_, kWorkspace)); + + input_sum_size_ = UP_ROUND(matmul_param_->row_, C4NUM) * sizeof(int32_t); + input_sum_ = reinterpret_cast(allocator_->Malloc(kNumberTypeInt32, input_sum_size_, kWorkspace)); + return RET_OK; +} + +int DeconvolutionInt8Coder::DoCode(CoderContext *const context) { + Collect(context, {"nnacl/int8/deconv.h"}, {"int8/deconv.c", "pack.c", "quantization/fixed_point.c"}); + + nnacl::NNaclInt8Serializer code; + code.CodeFunction("memset", input_ptr_, 0, input_ptr_size_); + code.CodeFunction("memset", tmp_buffer_, 0, tmp_buffer_size_); + code.CodeFunction("memset", tmp_output_, 0, tmp_output_size_); + code.CodeFunction("memset", input_sum_, 0, input_sum_size_); + + // define conv params + code.CodeStruct("conv_param_", *conv_param_); + + MS_CHECK_TRUE(conv_param_->input_batch_ == 1, "batch number should be 1."); + + code.CodeFunction("RowMajor2Row16x4MajorInt8", input_tensor_, input_ptr_, matmul_param_->row_, matmul_param_->deep_); + code.CodeFunction("DeConvPackInputSum", input_ptr_, input_sum_, + conv_param_->conv_quant_arg_.filter_quant_args_[0].zp_, UP_ROUND(matmul_param_->row_, C4NUM), + UP_ROUND(matmul_param_->deep_, C16NUM), support_optimize_); + + int kernel_plane = conv_param_->kernel_w_ * conv_param_->kernel_h_; + int cur_oc = MSMIN(thread_stride_, UP_DIV(conv_param_->output_channel_, C8NUM)); + int cur_oc_res = MSMIN(thread_stride_ * C4NUM, conv_param_->output_channel_); + + MS_CHECK_TRUE(cur_oc > 0, "cur_oc should be greater than 0."); + + code.CodeFunction("DeConvInt8", input_ptr_, weight_ptr_, tmp_buffer_, weight_sum_, input_sum_, + UP_ROUND(matmul_param_->row_, C4NUM), cur_oc * C4NUM * kernel_plane, + UP_ROUND(matmul_param_->deep_, C16NUM), "&conv_param_", matmul_func_str_); + + code.CodeFunction("DeConvPostInt8", tmp_buffer_, bias_data_, tmp_output_, output_tensor_, cur_oc_res, "&conv_param_", + support_optimize_); + context->AppendCode(code.str()); + return RET_OK; +} + +REG_OPERATOR_CODER(kAllTargets, kNumberTypeInt8, PrimitiveType_DeConv2D, CPUOpCoderCreator) +} // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.h new file mode 100644 index 0000000000..7b5839f3bf --- /dev/null +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/deconvolution_int8_coder.h @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_MICRO_CODER_OPCODERS_DECONVOLUTION_INT8_CODER_H_ +#define MINDSPORE_LITE_MICRO_CODER_OPCODERS_DECONVOLUTION_INT8_CODER_H_ + +#include +#include +#include "coder/opcoders/base/conv2d_base_coder.h" +#include "nnacl/matmul_parameter.h" + +namespace mindspore::lite::micro::nnacl { +class DeconvolutionInt8Coder final : public Conv2DBaseCoder { + public: + DeconvolutionInt8Coder(const std::vector &in_tensors, const std::vector &out_tensors, + const Model::Node *node, size_t node_index, Target target) + : Conv2DBaseCoder(in_tensors, out_tensors, node, node_index, target) {} + ~DeconvolutionInt8Coder() override { delete matmul_param_; } + + int DoCode(CoderContext *const context) override; + int Prepare(CoderContext *const context) override; + + private: + int Init(CoderContext *const context); + int InitData(CoderContext *ctx); + int InitParam(); + int InitBiasWeight(CoderContext *ctx); + void CheckSupportOptimize(); + int InitRunBuf(CoderContext *ctx); + + int32_t *tmp_buffer_{nullptr}; + int tmp_buffer_size_{0}; + int32_t *tmp_output_{nullptr}; + int tmp_output_size_{0}; + int32_t *input_sum_{nullptr}; + int input_sum_size_{0}; + + int8_t *input_ptr_{nullptr}; + int input_ptr_size_{0}; + int8_t *weight_ptr_{nullptr}; + int32_t *weight_sum_{nullptr}; + size_t thread_count_{1}; + int thread_stride_{0}; + int32_t *bias_data_{nullptr}; + std::string matmul_func_str_; + MatMulParameter *matmul_param_{nullptr}; + bool support_optimize_{true}; +}; +} // namespace mindspore::lite::micro::nnacl +#endif // MINDSPORE_LITE_MICRO_CODER_OPCODERS_DECONV_INT8_CODER_H_ diff --git a/mindspore/lite/micro/coder/session.cc b/mindspore/lite/micro/coder/session.cc index 777502a83e..8e1c349f3b 100644 --- a/mindspore/lite/micro/coder/session.cc +++ b/mindspore/lite/micro/coder/session.cc @@ -132,7 +132,7 @@ int CoderSession::GenerateCode() { generator = std::make_shared(std::move(context_)); break; case Code_Train: - MS_LOG(INFO) << "generate code for Inference"; + MS_LOG(INFO) << "generate code for Train"; generator = std::make_shared(std::move(context_)); break; default: @@ -141,6 +141,7 @@ int CoderSession::GenerateCode() { } // when use file, coder context need to remove initial parameters from tensors info // we use tmp_tensor_list to storage + MS_CHECK_PTR(generator); int ret = generator->GenerateCode(); if (ret != RET_OK) { MS_LOG(ERROR) << "generate code failed";