From 142cbaf4cefe71d3404fdf2b28199d6029d17e66 Mon Sep 17 00:00:00 2001 From: hangangqiang Date: Thu, 25 Mar 2021 15:33:59 +0800 Subject: [PATCH] fix fp16 --- mindspore/lite/src/dequant.cc | 188 +++++++++------ mindspore/lite/src/dequant.h | 15 +- mindspore/lite/src/huffman_decode.cc | 28 ++- mindspore/lite/src/huffman_decode.h | 19 +- mindspore/lite/src/kernel_registry.cc | 24 +- mindspore/lite/src/kernel_registry.h | 4 +- mindspore/lite/src/lite_session.cc | 104 ++++---- mindspore/lite/src/lite_session.h | 1 - .../src/runtime/kernel/arm/base/carry_data.cc | 14 +- .../lite/src/runtime/kernel/arm/base/merge.cc | 12 +- .../runtime/kernel/arm/fp16/fp16_op_handler.h | 5 +- .../kernel/arm/fp16/matmul_base_fp16.cc | 11 +- mindspore/lite/src/scheduler.cc | 225 +++++++++++++----- mindspore/lite/src/scheduler.h | 2 + mindspore/lite/src/sub_graph_kernel.cc | 11 +- mindspore/lite/src/sub_graph_kernel.h | 8 +- mindspore/lite/src/tensor.cc | 45 ++-- mindspore/lite/src/tensor.h | 17 +- mindspore/lite/src/tensorlist.cc | 16 +- mindspore/lite/src/tensorlist.h | 3 +- mindspore/lite/test/models_onnx_fp16.cfg | 2 +- 21 files changed, 445 insertions(+), 309 deletions(-) diff --git a/mindspore/lite/src/dequant.cc b/mindspore/lite/src/dequant.cc index 6987c9a45c..71b52a1a12 100644 --- a/mindspore/lite/src/dequant.cc +++ b/mindspore/lite/src/dequant.cc @@ -21,105 +21,135 @@ #include "nnacl/matmul_parameter.h" namespace mindspore::lite { -float *DequantUtil::DequantWeight(lite::Tensor *input_tensor, bool channel_first) { +int DequantUtil::DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type) { MS_ASSERT(input_tensor != nullptr); if (input_tensor->data_type() != kNumberTypeInt8 && input_tensor->data_type() != kNumberTypeInt16) { MS_LOG(ERROR) << "Conv weight input type error." << input_tensor->data_type(); - return nullptr; + return RET_ERROR; } if (input_tensor->quant_params().empty()) { MS_LOG(ERROR) << "No quant param."; - return nullptr; + return RET_ERROR; } - if (input_tensor->data_type() == kNumberTypeInt16) { - return DequantData(input_tensor, channel_first); + if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat32) { + auto new_const_data = DequantData(input_tensor, channel_first); + input_tensor->set_data(new_const_data); + input_tensor->set_own_data(true); + input_tensor->set_data_type(dst_data_type); + } else if (input_tensor->data_type() == kNumberTypeInt16 && dst_data_type == kNumberTypeFloat16) { +#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) + auto new_const_data = DequantData(input_tensor, channel_first); + input_tensor->set_data(new_const_data); + input_tensor->set_own_data(true); + input_tensor->set_data_type(dst_data_type); +#else + MS_LOG(ERROR) << "Float16 is not supported"; + return RET_NOT_SUPPORT; +#endif + } else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat32) { + auto new_const_data = DequantData(input_tensor, channel_first); + input_tensor->set_data(new_const_data); + input_tensor->set_own_data(true); + input_tensor->set_data_type(dst_data_type); + } else if (input_tensor->data_type() == kNumberTypeInt8 && dst_data_type == kNumberTypeFloat16) { +#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) + auto new_const_data = DequantData(input_tensor, channel_first); + input_tensor->set_data(new_const_data); + input_tensor->set_own_data(true); + input_tensor->set_data_type(dst_data_type); +#else + MS_LOG(ERROR) << "Float16 is not supported"; + return RET_NOT_SUPPORT; +#endif } else { - return DequantData(input_tensor, channel_first); + MS_LOG(ERROR) << "Unsupported dequant from data_type(" << (input_tensor->data_type()) << ") to data_type(" + << dst_data_type << ")"; + return RET_NOT_SUPPORT; } + return RET_OK; } -int DequantUtil::UnPackToInt(const schema::Tensor *input_tensor, void *unpack_int_data) { - MS_ASSERT(input_tensor != nullptr); - MS_ASSERT(unpack_int_data != nullptr); - auto quant_params = input_tensor->quantParams(); - if (quant_params == nullptr) { - MS_LOG(ERROR) << "low bits quantparams is empty."; - return RET_ERROR; +int DequantUtil::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { + MS_ASSERT(dst_tensor != nullptr); + if (!dst_tensor->IsConst() || !src_tensor.enableHuffmanCode()) { + return RET_NO_CHANGE; } - auto enable_huffman_code = input_tensor->enableHuffmanCode(); - if (enable_huffman_code) { - std::string encode_str(input_tensor->data()->begin(), input_tensor->data()->end()); - auto huffman_decode = std::make_unique(); - auto ret = huffman_decode->DoHuffmanDecode(encode_str, unpack_int_data); - if (ret != RET_OK) { - MS_LOG(ERROR) << "DoHuffmanDecode failed."; - return ret; - } - return RET_OK; + auto data = reinterpret_cast(src_tensor.data()->data()); + MS_ASSERT(data != nullptr); + std::string encode_str(data, src_tensor.data()->size()); + dst_tensor->set_data(nullptr); + auto ret = dst_tensor->MallocData(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Malloc tensor data failed"; + return RET_NULL_PTR; } - int origin_bit = quant_params->Get(0)->numBits(); - if (origin_bit < 8 && origin_bit > 0) { - UnPackUtil(input_tensor, origin_bit, unpack_int_data); - } else if (origin_bit < 16 && origin_bit > 8) { - UnPackUtil(input_tensor, origin_bit, unpack_int_data); + auto dst_data = dst_tensor->data_c(); + MS_ASSERT(dst_data != nullptr); + ret = HuffmanDecode::DoHuffmanDecode(encode_str, dst_data); + if (ret != RET_OK) { + MS_LOG(ERROR) << "DoHuffmanDecode failed."; + return ret; } return RET_OK; } -std::map> DequantUtil::DequantTensor(OpParameter *op_param, - const std::vector &in_tensors, - TypeId data_type, bool need_restore) { - std::map> tensor_origin_data; - if (data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat16) { - auto input_i = 0; - for (auto weight_tensor : in_tensors) { - MS_ASSERT(weight_tensor != nullptr); - input_i++; - auto channel_first = true; - if (op_param->type_ == schema::PrimitiveType_MatMul && weight_tensor->shape().size() == 2) { - auto param = reinterpret_cast(op_param); - if (input_i == 1) { - channel_first = !param->a_transpose_; - } else if (input_i == 2) { - channel_first = param->b_transpose_; - } else { - MS_LOG(WARNING) << "unexpected input_i"; - } - } - - auto *restore_data = weight_tensor->data_c(); - auto restore_type = weight_tensor->data_type(); - bool dequant_flag = !weight_tensor->quant_params().empty() && weight_tensor->quant_params().front().inited && - restore_data != nullptr && - (restore_type == kNumberTypeInt8 || restore_type == kNumberTypeInt16); - if (dequant_flag) { - auto *dequant_weight = DequantUtil::DequantWeight(weight_tensor, channel_first); - if (dequant_weight == nullptr) { - MS_LOG(ERROR) << "dequant data is nullptr."; - return tensor_origin_data; - } - if (need_restore) { - tensor_origin_data[weight_tensor] = {restore_type, restore_data}; - } else { - weight_tensor->FreeData(); - } - weight_tensor->set_data(dequant_weight); - weight_tensor->set_data_type(kNumberTypeFloat32); - } - } +int DequantUtil::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { + MS_ASSERT(dst_tensor != nullptr); + if (!dst_tensor->IsConst()) { + return RET_NO_CHANGE; + } + auto quant_params = src_tensor.quantParams(); + if (quant_params == nullptr || quant_params->size() == 0) { + return RET_NO_CHANGE; + } + auto quant_param = quant_params->Get(0); + if (quant_param == nullptr || !quant_param->inited()) { + return RET_NO_CHANGE; + } + auto dst_data = dst_tensor->data_c(); + if (dst_data != nullptr) { + MS_LOG(ERROR) << "lite Tensor has already malloced data"; + return RET_ERROR; + } + auto ret = dst_tensor->MallocData(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Malloc tensor data failed"; + return RET_NULL_PTR; + } + dst_data = dst_tensor->data_c(); + int origin_bit = quant_param->numBits(); + if (origin_bit < 8 && origin_bit > 0) { + UnPackUtil(&src_tensor, origin_bit, dst_data); + return RET_OK; + } else if (origin_bit < 16 && origin_bit > 8) { + UnPackUtil(&src_tensor, origin_bit, dst_data); + return RET_OK; + } else { + MS_LOG(ERROR) << "Unsupported bit number: " << origin_bit; + return RET_NOT_SUPPORT; } - return tensor_origin_data; } -void DequantUtil::RestoreTensorData(const std::map> &tensor_origin_data_map) { - for (auto &kv : tensor_origin_data_map) { - auto *tensor = kv.first; - auto type_id = kv.second.first; - auto data = kv.second.second; - tensor->FreeData(); - tensor->set_data_type(type_id); - tensor->set_data(data); +Tensor *DequantUtil::DequantTensor(Tensor *tensor, TypeId data_type, bool channel_first, TypeId dst_data_type) { + MS_ASSERT(tensor != nullptr); + Tensor *restore_tensor = nullptr; + if (!tensor->IsConst() || !(data_type == TypeId::kNumberTypeFloat32 || data_type == TypeId::kNumberTypeFloat16)) { + return nullptr; + } + auto restore_type = tensor->data_type(); + bool need_dequant = !tensor->quant_params().empty() && tensor->quant_params().front().inited && + (restore_type == kNumberTypeInt8 || restore_type == kNumberTypeInt16); + if (!need_dequant) { + return nullptr; } + restore_tensor = Tensor::CopyTensor(*tensor, false); + restore_tensor->set_data(tensor->data_c()); + restore_tensor->set_own_data(tensor->own_data()); + auto ret = DequantUtil::DequantWeight(tensor, channel_first, dst_data_type); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Dequant data failed: " << ret; + return nullptr; + } + return restore_tensor; } - } // namespace mindspore::lite diff --git a/mindspore/lite/src/dequant.h b/mindspore/lite/src/dequant.h index f2c7a76655..fac1a86767 100644 --- a/mindspore/lite/src/dequant.h +++ b/mindspore/lite/src/dequant.h @@ -29,19 +29,16 @@ namespace mindspore::lite { class DequantUtil { public: - static float *DequantWeight(lite::Tensor *input_tensor, bool); + static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); - static int UnPackToInt(const schema::Tensor *input_tensor, void *weight_unpack_data); + static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); - static std::map> DequantTensor(OpParameter *op_param, - const std::vector &in_tensors, - TypeId data_type, bool need_restore = true); - - static void RestoreTensorData(const std::map> &tensor_origin_data_map); + static Tensor *DequantTensor(Tensor *tensor, TypeId data_type, bool channel_first = true, + TypeId dst_data_type = kNumberTypeFloat32); template static DT *DequantData(lite::Tensor *input_tensor, bool channel_first = true) { - const auto *quant_datas = static_cast(input_tensor->MutableData()); + const auto *quant_datas = static_cast(input_tensor->data_c()); if (quant_datas == nullptr) { MS_LOG(ERROR) << "Get quant tensor failed."; return nullptr; @@ -138,6 +135,8 @@ class DequantUtil { } private: + static int DequantWeight(lite::Tensor *input_tensor, bool channel_first, TypeId dst_data_type = kNumberTypeFloat32); + template static void UnPackData(int origin_bit, const T2 &packed_data, std::queue *unpack_bit_data, void *unpack_int, size_t *count, bool is_last) { diff --git a/mindspore/lite/src/huffman_decode.cc b/mindspore/lite/src/huffman_decode.cc index 8432571f5f..44a0dc33b2 100644 --- a/mindspore/lite/src/huffman_decode.cc +++ b/mindspore/lite/src/huffman_decode.cc @@ -15,10 +15,10 @@ */ #include "src/huffman_decode.h" +#include namespace mindspore { namespace lite { - STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decoded_data) { if (decoded_data == nullptr) { MS_LOG(ERROR) << "decoded_data is nullptr."; @@ -26,8 +26,7 @@ STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decode } int status; - std::string huffman_decoded_str = ""; - + std::string huffman_decoded_str; auto key_pos = input_str.find_first_of('#'); auto code_pos = input_str.find_first_of('#', key_pos + 1); auto key = input_str.substr(0, key_pos); @@ -60,7 +59,7 @@ STATUS HuffmanDecode::DoHuffmanDecode(const std::string &input_str, void *decode size_t len = huffman_decoded_str.length(); memcpy(decoded_data, huffman_decoded_str.c_str(), len); - delete root; + FreeHuffmanNodeTree(root); return RET_OK; } @@ -91,7 +90,6 @@ STATUS HuffmanDecode::RebuildHuffmanTree(std::string keys, std::string codes, co MS_LOG(ERROR) << "new HuffmanNode failed."; return RET_MEMORY_FAILED; } - this->huffman_nodes_.push_back(new_node); new_node->left = nullptr; new_node->right = nullptr; new_node->parent = cur_node; @@ -157,11 +155,23 @@ STATUS HuffmanDecode::DoHuffmanDecompress(HuffmanNodePtr root, std::string encod return RET_OK; } -HuffmanDecode::~HuffmanDecode() { - for (auto &node : this->huffman_nodes_) { - delete node; +void HuffmanDecode::FreeHuffmanNodeTree(HuffmanNodePtr root) { + if (root == nullptr) { + return; + } + std::queue node_queue; + node_queue.push(root); + while (!node_queue.empty()) { + auto cur_node = node_queue.front(); + node_queue.pop(); + if (cur_node->left != nullptr) { + node_queue.push(cur_node->left); + } + if (cur_node->right != nullptr) { + node_queue.push(cur_node->right); + } + delete (cur_node); } - this->huffman_nodes_.resize(0); } } // namespace lite diff --git a/mindspore/lite/src/huffman_decode.h b/mindspore/lite/src/huffman_decode.h index 9f15537082..0495fd06c6 100644 --- a/mindspore/lite/src/huffman_decode.h +++ b/mindspore/lite/src/huffman_decode.h @@ -27,7 +27,6 @@ namespace mindspore { namespace lite { - const int PSEUDO_EOF = 128; struct HuffmanNode { @@ -36,23 +35,25 @@ struct HuffmanNode { std::string code; HuffmanNode *left, *right, *parent; }; + using HuffmanNodePtr = HuffmanNode *; class HuffmanDecode { public: - HuffmanDecode() = default; - - ~HuffmanDecode(); + virtual ~HuffmanDecode() = default; - STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data); + static STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data); private: - std::vector huffman_nodes_; - STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root); + HuffmanDecode() = default; + + static void FreeHuffmanNodeTree(HuffmanNodePtr root); + + static STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root); - STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str); + static STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str); - std::vector Str2Vec(std::string s) { + static std::vector Str2Vec(std::string s) { size_t i = 0; std::vector vec; while (i < s.length()) { diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc index f383d4ac85..9448c15c54 100644 --- a/mindspore/lite/src/kernel_registry.cc +++ b/mindspore/lite/src/kernel_registry.cc @@ -17,7 +17,6 @@ #include "include/errorcode.h" #include "src/ops/populate/populate_register.h" #include "src/common/version_manager.h" -#include "src/common/prim_util.h" #include "nnacl/pooling_parameter.h" #include "src/reg_kernels.h" #ifdef ENABLE_ARM64 @@ -120,21 +119,24 @@ KernelRegistry::~KernelRegistry() { } } -int KernelRegistry::GetKernel(const std::vector &in_tensors, const std::vector &out_tensors, - const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *parameter, - kernel::LiteKernel **kernel) { +bool KernelRegistry::SupportKernel(const KernelKey &key) { + auto kernel_creator = GetCreator(key); + return kernel_creator != nullptr; +} + +kernel::LiteKernel *KernelRegistry::GetKernel(const std::vector &in_tensors, + const std::vector &out_tensors, const InnerContext *ctx, + const kernel::KernelKey &key, OpParameter *parameter) { MS_ASSERT(ctx != nullptr); - MS_ASSERT(kernel != nullptr); auto creator = GetCreator(key); if (creator != nullptr) { - *kernel = creator(in_tensors, out_tensors, parameter, ctx, key); - if (*kernel != nullptr) { - (*kernel)->set_desc(key); - return RET_OK; + auto kernel = creator(in_tensors, out_tensors, parameter, ctx, key); + if (kernel != nullptr) { + kernel->set_desc(key); + return kernel; } - return RET_ERROR; } - return RET_NOT_SUPPORT; + return nullptr; } #ifdef MS_COMPILE_IOS diff --git a/mindspore/lite/src/kernel_registry.h b/mindspore/lite/src/kernel_registry.h index abbe7801b0..24c224cd67 100644 --- a/mindspore/lite/src/kernel_registry.h +++ b/mindspore/lite/src/kernel_registry.h @@ -37,7 +37,6 @@ class KernelRegistry { static KernelRegistry *GetInstance(); static int Init(); virtual kernel::KernelCreator GetCreator(const kernel::KernelKey &desc); - const kernel::KernelCreator *GetCreatorArrays(); int GetCreatorFuncIndex(kernel::KernelKey desc); void RegKernel(kernel::KernelKey desc, kernel::KernelCreator creator); void RegKernel(kernel::KERNEL_ARCH arch, TypeId data_type, int type, kernel::KernelCreator creator); @@ -45,6 +44,9 @@ class KernelRegistry { int GetKernel(const std::vector &in_tensors, const std::vector &out_tensors, const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *op_parameter, kernel::LiteKernel **kernel); + bool SupportKernel(const kernel::KernelKey &key); + kernel::LiteKernel *GetKernel(const std::vector &in_tensors, const std::vector &out_tensors, + const InnerContext *ctx, const kernel::KernelKey &key, OpParameter *op_parameter); #ifdef MS_COMPILE_IOS void RegisterAllKernels(); #endif diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index b78b97479e..987d1a1385 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -42,20 +42,38 @@ namespace mindspore { namespace lite { -// this method will not check whether tensor_idx is a weight tensor index, caller should ensure this. -static bool WeightTensorNeedCopy(const lite::Model *model, const uint32_t tensor_idx) { -#ifdef SUPPORT_TRAIN - return false; -#endif - - MS_ASSERT(model != nullptr); - auto post_node_idxes = GetLinkedPostNodeIdx(model, tensor_idx); - return std::none_of(post_node_idxes.begin(), post_node_idxes.end(), [&](const size_t &post_node_idx) { - auto node = model->all_nodes_[post_node_idx]; - MS_ASSERT(node != nullptr); - return IsPackedOp(GetPrimitiveType(node->primitive_)); - }); +namespace { +int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { + MS_ASSERT(dst_tensor != nullptr); + bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 && + src_tensor.quantParams()->Get(0) != nullptr && src_tensor.quantParams()->Get(0)->inited(); + if (need_bit_unpack) { + auto num_bits = src_tensor.quantParams()->Get(0)->numBits(); + need_bit_unpack = ((num_bits > 0 && num_bits < 8) || (num_bits > 8 && num_bits < 16)); + } + if (!src_tensor.enableHuffmanCode() && !need_bit_unpack) { + return RET_NO_CHANGE; + } + // huffman code and bit pack are not assumed to be performed at same time + STATUS ret = RET_ERROR; + if (src_tensor.enableHuffmanCode()) { + ret = DequantUtil::DecodeHuffmanCode(src_tensor, dst_tensor); + if (ret != RET_OK && ret != RET_NO_CHANGE) { + MS_LOG(ERROR) << "Decode huffman code failed: " << ret; + return ret; + } + } else if (need_bit_unpack) { + ret = DequantUtil::UnPackToInt(src_tensor, dst_tensor); + if (ret != RET_OK && ret != RET_NO_CHANGE) { + MS_LOG(ERROR) << "Unpack to int8 failed: " << ret; + return ret; + } + } else { + ret = RET_OK; + } + return ret; } +} // namespace LiteSession::LiteSession() { this->is_running_.store(false); } @@ -78,7 +96,6 @@ void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lit dst_tensor->AddQuantParam(quant_arg); } } - dst_tensor->set_enable_huffman_code(src_tensor->enableHuffmanCode()); auto quant_clusters = src_tensor->quantClusters(); if (quant_clusters != nullptr) { std::vector clusters; @@ -93,57 +110,23 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde lite::Tensor *dst_tensor) { MS_ASSERT(src_tensor != nullptr); MS_ASSERT(dst_tensor != nullptr); - auto NeedUnPack = [&src_tensor, &dst_tensor]() -> bool { - auto data_type = src_tensor->dataType(); - int pack_size = src_tensor->data()->size(); - int org_size = dst_tensor->Size(); - return (pack_size != org_size) && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16); - }; auto src_category = TensorCategory(src_tensor); if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) && src_tensor->data() != nullptr && src_tensor->data()->size() > 0) { if (src_tensor->dataType() == kObjectTypeTensorType) { auto tensor_list = reinterpret_cast(dst_tensor); - if (src_tensor->data() == nullptr) { - MS_LOG(ERROR) << "src_tensor->data() is nullptr"; - return RET_ERROR; - } if (tensor_list->Decode(reinterpret_cast(src_tensor->data()->data())) != RET_OK) { + MS_LOG(ERROR) << "Decode tensorlist data failed"; return RET_ERROR; } } else { - if (WeightTensorNeedCopy(model, tensor_index)) { - auto dst_data = dst_tensor->MutableData(); - if (dst_data == nullptr) { - MS_LOG(ERROR) << "Data from tensor is nullptr"; - return RET_NULL_PTR; - } - if (NeedUnPack()) { - auto ret = DequantUtil::UnPackToInt(src_tensor, dst_data); - if (ret != RET_OK) { - MS_LOG(ERROR) << "unpack to int failed."; - return RET_NULL_PTR; - } - } else { - memcpy(dst_data, src_tensor->data()->data(), dst_tensor->Size()); - } - copyed_tensor_idxes_.emplace_back(tensor_index); - } else { - if (NeedUnPack()) { - auto dst_data = dst_tensor->MutableData(); - if (dst_data == nullptr) { - MS_LOG(ERROR) << "Data from tensor is nullptr"; - return RET_ERROR; - } - auto ret = DequantUtil::UnPackToInt(src_tensor, dst_data); - if (ret != RET_OK) { - MS_LOG(ERROR) << "unpack to int failed."; - return RET_ERROR; - } - copyed_tensor_idxes_.emplace_back(tensor_index); - } else { - dst_tensor->set_data(const_cast(src_tensor->data()->data())); - } + auto ret = DecompressTensor(*src_tensor, dst_tensor); + if (ret == RET_NO_CHANGE) { + dst_tensor->set_data(const_cast(src_tensor->data()->data())); + dst_tensor->set_own_data(false); + } else if (ret != RET_OK) { + MS_LOG(ERROR) << "Decompress tensor data failed: " << ret; + return ret; } } } @@ -176,7 +159,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { int LiteSession::ConvertTensors(const lite::Model *model) { MS_ASSERT(model != nullptr); - copyed_tensor_idxes_.clear(); uint32_t tensor_count = model->all_tensors_.size(); MS_ASSERT(!model->sub_graphs_.empty()); auto model_input_indices = model->sub_graphs_.front()->input_indices_; @@ -582,11 +564,11 @@ LiteSession::~LiteSession() { for (auto *kernel : kernels_) { delete kernel; } - for (size_t i = 0; i < tensors_.size(); i++) { - auto *tensor = tensors_.at(i); + for (auto tensor : tensors_) { MS_ASSERT(tensor != nullptr); - // data of weight tensor of node in packed_op can not be to free, we will free weight data when freeing meta_graph - if (tensor->IsConst() && !IsContain(this->inputs_, tensor) && !IsContain(copyed_tensor_idxes_, i)) { + // Data of const tensor which doesn't own data will not freed. + // Such as const data from meta_graph which will be freed when freeing meta_graph. + if (tensor->IsConst() && !tensor->own_data()) { tensor->set_data(nullptr); } delete tensor; diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h index 4baf6dbc7e..1e4375a5f1 100644 --- a/mindspore/lite/src/lite_session.h +++ b/mindspore/lite/src/lite_session.h @@ -115,7 +115,6 @@ class LiteSession : public session::LiteSession { InnerContext *context_ = nullptr; std::vector kernels_; std::vector tensors_; - std::vector copyed_tensor_idxes_; // graph input tensors std::vector inputs_; // graph output tensors diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc index 0121cabe11..31c4e212a7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc @@ -77,14 +77,12 @@ int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_ } else { dst_tensor->FreeData(); dst_tensor->set_data(src_tensor->data_c()); + dst_tensor->set_own_data(true); src_tensor->set_data(nullptr); + src_tensor->set_own_data(true); } } else { - auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; - return ret; - } + dst_tensor->set_root_tensor(src_tensor->root_tensor()); } return RET_OK; } @@ -121,11 +119,7 @@ int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensor, lite::Tens src_tensor->set_tensors({}); } else { dst_tensor->set_shape(src_tensor->shape()); - auto ret = dst_tensor->set_root_tensor(src_tensor->root_tensor()); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Set root tensor for tensor(" << dst_tensor->tensor_name() << ") failed"; - return ret; - } + dst_tensor->set_root_tensor(src_tensor->root_tensor()); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc index 6034403319..6176660ec1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/merge.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/merge.cc @@ -63,16 +63,8 @@ int MergeCPUKernel::Init() { MS_ASSERT(in_tensors_[i] != nullptr); MS_ASSERT(in_tensors_[i + stride] != nullptr); if (in_tensors_[i] == in_tensors_[i + stride]) { - auto ret = in_tensors_[i]->set_root_tensor(in_tensors_[i]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i]->tensor_name() << ") failed"; - return ret; - } - ret = in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Set root tensor for tensor(" << in_tensors_[i + stride]->tensor_name() << ") failed"; - return ret; - } + in_tensors_[i]->set_root_tensor(in_tensors_[i]); + in_tensors_[i + stride]->set_root_tensor(in_tensors_[i + stride]); } } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fp16_op_handler.h b/mindspore/lite/src/runtime/kernel/arm/fp16/fp16_op_handler.h index 636969c4f5..86db28de40 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/fp16_op_handler.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fp16_op_handler.h @@ -25,10 +25,11 @@ extern "C" { extern void Float32ToFloat16(const float *input, float16_t *output, int number); extern void Float16ToFloat32(const float16_t *input, float *output, int number); -void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) { +inline void Float32ToFloat16_fp16_handler(const void *input, void *output, int number) { Float32ToFloat16(reinterpret_cast(input), reinterpret_cast(output), number); } -void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) { + +inline void Float16ToFloat32_fp16_handler(const void *input, void *output, int number) { Float16ToFloat32(reinterpret_cast(input), reinterpret_cast(output), number); } #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc index 89a432d17b..beaaf0d22d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc @@ -71,13 +71,20 @@ int MatmulBaseFP16CPUKernel::InitBias() { if (in_tensors_.size() == 3) { auto bias_tensor = in_tensors_[2]; int max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), C8NUM); - bias_ptr_ = reinterpret_cast(malloc(max_bias_data * sizeof(float))); + bias_ptr_ = reinterpret_cast(malloc(max_bias_data * sizeof(float16_t))); if (bias_ptr_ == nullptr) { MS_LOG(ERROR) << "malloc bias_ptr_ failed"; return RET_ERROR; } memset(bias_ptr_, 0, max_bias_data * sizeof(float16_t)); - Float32ToFloat16(reinterpret_cast(in_tensors_[2]->data_c()), bias_ptr_, bias_tensor->ElementsNum()); + if (in_tensors_[2]->data_type() == kNumberTypeFloat32) { + Float32ToFloat16(reinterpret_cast(in_tensors_[2]->data_c()), bias_ptr_, bias_tensor->ElementsNum()); + } else if (in_tensors_[2]->data_type() == kNumberTypeFloat16) { + memcpy(bias_ptr_, in_tensors_[2]->data_c(), max_bias_data * sizeof(float16_t)); + } else { + MS_LOG(ERROR) << "Unsupported bias data type : " << in_tensors_[2]->data_type(); + return RET_ERROR; + } } return RET_OK; } diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index ece45d4f97..616acbb3e7 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -31,6 +31,7 @@ #include "src/common/prim_util.h" #include "src/runtime/infer_manager.h" #include "src/dequant.h" +#include "nnacl/matmul_parameter.h" #if GPU_OPENCL #include "src/runtime/kernel/opencl/opencl_subgraph.h" #include "src/runtime/gpu/opencl/opencl_runtime.h" @@ -43,6 +44,10 @@ #include "src/runtime/agent/npu/optimizer/npu_fusion_pass.h" #include "src/runtime/agent/npu/optimizer/npu_insert_transform_pass.h" #endif +#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) +#include "src/runtime/kernel/arm/fp16/fp16_op_handler.h" +#endif + namespace mindspore::lite { using kernel::KERNEL_ARCH::kCPU; using kernel::KERNEL_ARCH::kGPU; @@ -198,46 +203,168 @@ int Scheduler::InferSubGraphShape(size_t subgraph_index, bool *infer_shape_inter return RET_OK; } +namespace { +#ifndef SUPPORT_TRAIN +int CopyConstTensor(Tensor *tensor, std::map *restored_origin_tensors, TypeId dst_data_type) { + MS_ASSERT(restored_origin_tensors != nullptr); + MS_ASSERT(tensor != nullptr); + if (dst_data_type != kNumberTypeFloat32 && dst_data_type != kNumberTypeFloat16) { + MS_LOG(ERROR) << "Only support fp32 or fp16 as dst_data_type."; + return RET_PARAM_INVALID; + } + // tensorlist not support fp16 now + if (!tensor->IsConst() || tensor->data_type() == kObjectTypeTensorType) { + return RET_OK; + } + auto origin_data = tensor->data_c(); + MS_ASSERT(origin_data != nullptr); + if (tensor->data_type() == kNumberTypeFloat32 && dst_data_type == kNumberTypeFloat16) { +#if defined(ENABLE_ARM64) && defined(ENABLE_FP16) + auto restore_tensor = Tensor::CopyTensor(*tensor, false); + restore_tensor->set_data(origin_data); + restore_tensor->set_own_data(tensor->own_data()); + tensor->set_data(nullptr); + tensor->set_data_type(kNumberTypeFloat16); + auto ret = tensor->MallocData(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "malloc data failed"; + return ret; + } + auto new_tensor_data = tensor->data_c(); + MS_ASSERT(new_tensor_data != nullptr); + Float32ToFloat16_fp16_handler(origin_data, new_tensor_data, tensor->ElementsNum()); + (*restored_origin_tensors)[tensor] = restore_tensor; +#else + MS_LOG(ERROR) << "Unsupported dst data type: float16"; + return RET_ERROR; +#endif + } else { + tensor->set_data(nullptr); + auto ret = tensor->MallocData(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "malloc data failed"; + return ret; + } + auto new_data = tensor->data_c(); + MS_ASSERT(new_data != nullptr); + memcpy(new_data, origin_data, tensor->Size()); + } + return RET_OK; +} +#endif + +inline void RestoreTensorData(const std::map &restored_origin_tensors) { + for (auto &restored_origin_tensor : restored_origin_tensors) { + auto *origin_tensor = restored_origin_tensor.first; + auto *restored_tensor = restored_origin_tensor.second; + MS_ASSERT(origin_tensor != nullptr); + MS_ASSERT(restored_tensor != nullptr); + origin_tensor->FreeData(); + origin_tensor->set_data_type(restored_tensor->data_type()); + origin_tensor->set_data(restored_tensor->data_c()); + origin_tensor->set_own_data(restored_tensor->own_data()); + } +} + +inline void FreeRestoreTensors(std::map *restored_origin_tensors) { + MS_ASSERT(restored_origin_tensors != nullptr); + for (auto &restored_origin_tensor : *restored_origin_tensors) { + restored_origin_tensor.second->set_data(nullptr); + delete (restored_origin_tensor.second); + } + restored_origin_tensors->clear(); +} + +inline bool IsChannelFirst(const std::vector &in_tensors, OpParameter *op_parameter) { + MS_ASSERT(op_parameter != nullptr); + if (op_parameter->type_ == schema::PrimitiveType_MatMul) { + for (size_t i = 0; i < in_tensors.size(); i++) { + auto tensor = in_tensors.at(i); + MS_ASSERT(tensor != nullptr); + if (tensor->shape().size() != 2) { + continue; + } + const auto *param = reinterpret_cast(op_parameter); + if (i == 1) { + return !(param->a_transpose_); + } else if (i == 2) { + return param->b_transpose_; + } else { + // not care bias data + } + } + } + return true; +} +} // namespace + +kernel::LiteKernel *Scheduler::FindCpuKernel(const std::vector &in_tensors, + const std::vector &out_tensors, OpParameter *op_parameter, + const kernel::KernelKey &desc, TypeId kernel_data_type) { + MS_ASSERT(op_parameter != nullptr); + auto op_type = op_parameter->type_; + if (!KernelRegistry::GetInstance()->SupportKernel(desc)) { + return nullptr; + } + std::map restored_origin_tensors; + for (auto &tensor : in_tensors) { + auto channel_first = IsChannelFirst(in_tensors, op_parameter); + auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kernel_data_type); + if (restore_tensor != nullptr) { + restored_origin_tensors[tensor] = restore_tensor; + } else { +#ifndef SUPPORT_TRAIN + if (!IsPackedOp(op_type) && !tensor->own_data()) { // && op_type != schema::PrimitiveType_LSTM + auto ret = CopyConstTensor(tensor, &restored_origin_tensors, kernel_data_type); + if (ret != RET_OK) { + MS_LOG(DEBUG) << "CopyConstTensor failed: " << ret; + return nullptr; + } + } +#endif + } + } + auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter); + if (kernel != nullptr) { + MS_LOG(DEBUG) << "Get TypeId(" << kernel_data_type << ") op success: " << PrimitiveTypeName(op_type); + FreeRestoreTensors(&restored_origin_tensors); + } else { + RestoreTensorData(restored_origin_tensors); + } + return kernel; +} + kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in_tensors, const std::vector &out_tensors, const Model::Node *node, TypeId prefer_data_type) { - kernel::LiteKernel *kernel = nullptr; - TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors); + MS_ASSERT(node != nullptr); + bool need_dequant = node->quant_type_ == schema::QuantType_WeightQuant; + TypeId data_type = need_dequant ? kNumberTypeFloat32 : GetFirstFp32Fp16OrInt8Type(in_tensors); OpParameter *op_parameter = op_parameters_[node->output_indices_.at(0)]; if (op_parameter == nullptr) { MS_LOG(ERROR) << "Can not find OpParameter!type: " << PrimitiveTypeName(GetPrimitiveType(node->primitive_)); return nullptr; } bool infer_shape_interrupt = !op_parameter->infer_flag_; - bool need_restore = true; - if (node->quant_type_ == schema::QuantType_WeightQuant) { - data_type = kNumberTypeFloat32; - } - if (!IsPackedOp(op_parameter->type_)) { - need_restore = false; - } kernel::KernelKey desc{kCPU, data_type, static_cast(op_parameter->type_)}; #if SUPPORT_GPU if (context_->IsGpuEnabled()) { // support more data type like int32 kernel::KernelKey gpu_desc{kGPU, kNumberTypeFloat32, desc.type}; if (context_->IsGpuFloat16Enabled()) gpu_desc.data_type = kNumberTypeFloat16; - auto ret = - KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, gpu_desc, op_parameter, &kernel); - if (ret == RET_OK) { + auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, gpu_desc, op_parameter); + if (kernel != nullptr) { MS_LOG(DEBUG) << "Get gpu op success: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " << node->name_; return kernel; } else { MS_LOG(DEBUG) << "Get gpu op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(gpu_desc.type) << " " << node->name_; - if (ret == RET_ERROR) { - ret = InferNodeShape(node, &infer_shape_interrupt); - if (ret == RET_INFER_INVALID || ret == RET_OK) { - op_parameter = op_parameters_[node->output_indices_.at(0)]; - } else { - MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; - return nullptr; - } + auto ret = InferNodeShape(node, &infer_shape_interrupt); + if (ret == RET_INFER_INVALID || ret == RET_OK) { + op_parameter = op_parameters_[node->output_indices_.at(0)]; + } else { + MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; + return nullptr; } } } @@ -253,22 +380,19 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in } } kernel::KernelKey npu_desc{kNPU, desc.data_type, desc.type}; - auto ret = - KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, npu_desc, op_parameter, &kernel); - if (ret == RET_OK) { + auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, npu_desc, op_parameter); + if (kernel != nullptr) { MS_LOG(DEBUG) << "Get npu op success: " << PrimitiveCurVersionTypeName(npu_desc.type) << " " << node->name_; return kernel; } else { MS_LOG(DEBUG) << "Get npu op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(npu_desc.type) << " " << node->name_; - if (ret == RET_ERROR) { - ret = InferNodeShape(node, &infer_shape_interrupt); - if (ret == RET_INFER_INVALID || ret == RET_OK) { - op_parameter = op_parameters_[node->output_indices_.at(0)]; - } else { - MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; - return nullptr; - } + auto ret = InferNodeShape(node, &infer_shape_interrupt); + if (ret == RET_INFER_INVALID || ret == RET_OK) { + op_parameter = op_parameters_[node->output_indices_.at(0)]; + } else { + MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; + return nullptr; } } } @@ -277,25 +401,18 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in mindspore::lite::IsSupportFloat16() && ((context_->IsCpuFloat16Enabled() && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16)) { kernel::KernelKey fp16_cpu_desc{desc.arch, kNumberTypeFloat16, desc.type}; - auto tensor_origin_data_map = - DequantUtil::DequantTensor(op_parameter, in_tensors, fp16_cpu_desc.data_type, need_restore); - auto ret = - KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, fp16_cpu_desc, op_parameter, &kernel); - DequantUtil::RestoreTensorData(tensor_origin_data_map); - if (ret == RET_OK) { - MS_LOG(DEBUG) << "Get fp16 op success: " << PrimitiveCurVersionTypeName(fp16_cpu_desc.type) << " " << node->name_; + auto kernel = FindCpuKernel(in_tensors, out_tensors, op_parameter, fp16_cpu_desc, kNumberTypeFloat16); + if (kernel != nullptr) { return kernel; } else { MS_LOG(DEBUG) << "Get fp16 op failed, scheduler to cpu: " << PrimitiveCurVersionTypeName(fp16_cpu_desc.type) << " " << node->name_; - if (ret == RET_ERROR) { - ret = InferNodeShape(node, &infer_shape_interrupt); - if (ret == RET_INFER_INVALID || ret == RET_OK) { - op_parameter = op_parameters_[node->output_indices_.at(0)]; - } else { - MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; - return nullptr; - } + auto ret = InferNodeShape(node, &infer_shape_interrupt); + if (ret == RET_INFER_INVALID || ret == RET_OK) { + op_parameter = op_parameters_[node->output_indices_.at(0)]; + } else { + MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; + return nullptr; } } } @@ -304,20 +421,20 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in desc.data_type = kNumberTypeFloat32; } if (prefer_data_type == kNumberTypeFloat32 || prefer_data_type == kTypeUnknown) { - auto tensor_origin_data_map = DequantUtil::DequantTensor(op_parameter, in_tensors, desc.data_type, need_restore); - auto ret = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, context_, desc, op_parameter, &kernel); - DequantUtil::RestoreTensorData(tensor_origin_data_map); - if (ret == RET_OK) { + auto kernel = FindCpuKernel(in_tensors, out_tensors, op_parameter, desc, kNumberTypeFloat32); + if (kernel != nullptr) { return kernel; - } else if (ret == RET_ERROR) { - ret = InferNodeShape(node, &infer_shape_interrupt); + } else { + auto ret = InferNodeShape(node, &infer_shape_interrupt); if (!(ret == RET_INFER_INVALID || ret == RET_OK)) { - MS_LOG(ERROR) << "Try repeat infer fail: " << node->name_; + MS_LOG(ERROR) + + << "Try repeat infer fail: " << node->name_; } } } return nullptr; -} +} // namespace mindspore::lite kernel::LiteKernel *Scheduler::SchedulePartialToKernel(const lite::Model::Node *src_node) { MS_ASSERT(src_model_ != nullptr); diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index 19c9c43c7a..f4fe469520 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -59,6 +59,8 @@ class Scheduler { kernel::LiteKernel *FindBackendKernel(const std::vector &in_tensors, const std::vector &out_tensors, const Model::Node *node, TypeId prefer_data_type = kTypeUnknown); + kernel::LiteKernel *FindCpuKernel(const std::vector &in_tensors, const std::vector &out_tensors, + OpParameter *op_parameter, const kernel::KernelKey &desc, TypeId kernel_data_type); // schedule a partial node to a subgraph_kernel kernel::LiteKernel *SchedulePartialToKernel(const lite::Model::Node *src_node); // schedule a node to a kernel diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index 38ee910985..2d44d94cfe 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -205,7 +205,9 @@ void CpuFp16SubGraph::FreeOriginInputData() { } int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { + MS_ASSERT(tensor != nullptr); auto float32_data = tensor->data_c(); + auto own_data = tensor->own_data(); if (float32_data == nullptr) { MS_LOG(ERROR) << "tensor data is null."; return lite::RET_NULL_PTR; @@ -215,15 +217,14 @@ int CpuFp16SubGraph::Float32TensorToFloat16Tensor(lite::Tensor *tensor) { auto ret = tensor->MallocData(); if (RET_OK != ret) { MS_LOG(ERROR) << "malloc data failed"; - this->FreeOriginInputData(); return RET_ERROR; } MS_ASSERT(tensor->data_c() != nullptr); Float32ToFloat16_fp16_handler(float32_data, tensor->data_c(), tensor->ElementsNum()); - auto *data_store = DataStore::CreateDataStore(float32_data, tensor->allocator(), this->context_->allocator.get()); + auto *data_store = + DataStore::CreateDataStore(float32_data, own_data, tensor->allocator(), this->context_->allocator.get()); if (data_store == nullptr) { MS_LOG(ERROR) << "Create DataStore failed"; - this->FreeOriginInputData(); return RET_ERROR; } origin_input_data_[tensor] = data_store; @@ -283,6 +284,7 @@ int CpuFp16SubGraph::PreProcess() { ret = Float32TensorToFloat16Tensor(real_tensor); if (RET_OK != ret) { MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; + this->FreeOriginInputData(); return ret; } } else if (real_tensor->data_type() == kObjectTypeTensorType) { @@ -293,6 +295,7 @@ int CpuFp16SubGraph::PreProcess() { ret = Float32TensorToFloat16Tensor(inner_tensor); if (RET_OK != ret) { MS_LOG(ERROR) << "Float32TensorToFloat16Tensor failed."; + this->FreeOriginInputData(); return ret; } } @@ -372,6 +375,7 @@ int CpuFp16SubGraph::PostProcess() { real_tensor->FreeData(); MS_ASSERT(origin_tensor_data->data_ != nullptr); real_tensor->set_data(origin_tensor_data->data_); + real_tensor->set_own_data(origin_tensor_data->own_data_); real_tensor->set_data_type(kNumberTypeFloat32); origin_tensor_data->data_ = nullptr; tensor_count++; @@ -385,6 +389,7 @@ int CpuFp16SubGraph::PostProcess() { inner_tensor->FreeData(); MS_ASSERT(origin_tensor_data->data_ != nullptr); inner_tensor->set_data(origin_tensor_data->data_); + inner_tensor->set_own_data(origin_tensor_data->own_data_); inner_tensor->set_data_type(kNumberTypeFloat32); origin_tensor_data->data_ = nullptr; tensor_count++; diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index 7f7ac44fc4..a4aa8588a2 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -33,9 +33,10 @@ namespace mindspore::kernel { // store origin data and allocator of input tensor of subgraph for PreProcess and PostProcess struct DataStore { void *data_ = nullptr; - mindspore::Allocator *allocator_ = nullptr; - static DataStore *CreateDataStore(void *data = nullptr, mindspore::Allocator *data_allocator = nullptr, - mindspore::Allocator *allocator = nullptr) { + Allocator *allocator_ = nullptr; + bool own_data_ = true; + static DataStore *CreateDataStore(void *data = nullptr, bool own_data = true, Allocator *data_allocator = nullptr, + Allocator *allocator = nullptr) { DataStore *data_store = nullptr; if (allocator == nullptr) { data_store = static_cast(malloc(sizeof(DataStore))); @@ -47,6 +48,7 @@ struct DataStore { return nullptr; } data_store->data_ = data; + data_store->own_data_ = own_data; data_store->allocator_ = data_allocator; return data_store; } diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 1488699d27..f15b402c11 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -25,7 +25,7 @@ namespace mindspore { namespace lite { -#define kMaxMallocSize 1024 * 1024 * 100 +#define kMaxMallocSize 1024 * 1024 * 300 Tensor::Tensor(const TypeId data_type, std::vector shape, const schema::Format &format, Category category) : data_type_(data_type), shape_(std::move(shape)), format_(format), category_(category) {} @@ -43,16 +43,9 @@ int Tensor::CopyTensorData(const Tensor &src_tensor, Tensor *dst_tensor) { MS_LOG(ERROR) << "Size of dst tensor is not compatible with src tensor"; return RET_ERROR; } - if (dst_tensor->data_ == nullptr) { - if (data_size > kMaxMallocSize) { - MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; - return RET_ERROR; - } - dst_tensor->data_ = malloc(data_size); - if (dst_tensor->data_ == nullptr) { - MS_LOG(ERROR) << "Malloc memory failed"; - return RET_ERROR; - } + if (dst_tensor->MallocData() != RET_OK) { + MS_LOG(ERROR) << "Malloc memory failed"; + return RET_ERROR; } memcpy(dst_tensor->data_, src_tensor.data_, data_size); return RET_OK; @@ -74,12 +67,13 @@ Tensor *Tensor::CopyTensor(const Tensor &src_tensor, bool copy_data) { MS_LOG(ERROR) << "CopyTensorData error"; return nullptr; } + result->own_data_ = src_tensor.own_data_; } return result; } Tensor::~Tensor() { - if (nullptr != this->data_) { + if (nullptr != this->data_ && this->own_data_) { if (this->allocator_ != nullptr) { this->allocator_->Free(this->data_); } else { @@ -276,13 +270,13 @@ std::string Tensor::ToString() const { return oss.str(); } -int Tensor::set_root_tensor(Tensor *tensor) { +void Tensor::set_root_tensor(Tensor *tensor) { this->root_tensor_ = tensor; if (this->root_tensor_ == this) { - return RET_OK; + return; } if (this->root_tensor_ == nullptr) { - return RET_OK; + return; } this->shape_ = this->root_tensor_->shape_; this->format_ = this->root_tensor_->format_; @@ -290,7 +284,6 @@ int Tensor::set_root_tensor(Tensor *tensor) { this->category_ = this->root_tensor_->category_; this->quant_params_ = this->root_tensor_->quant_params_; this->quant_clusters_ = this->root_tensor_->quant_clusters_; - return RET_OK; } int Tensor::MallocData(const mindspore::Allocator *allocator) { @@ -300,16 +293,21 @@ int Tensor::MallocData(const mindspore::Allocator *allocator) { if (allocator != nullptr) { allocator_ = const_cast(allocator); } + auto data_size = this->Size(); + if (data_size > kMaxMallocSize) { + MS_LOG(ERROR) << "Malloc size is too big while coping data, " << data_size << " bytes"; + return RET_ERROR; + } if (allocator_ == nullptr) { - this->data_ = malloc(this->Size()); + this->data_ = malloc(data_size); } else { - this->data_ = allocator_->Malloc(this->Size()); + this->data_ = allocator_->Malloc(data_size); } if (nullptr == this->data_) { - MS_LOG(ERROR) << "Malloc tensor data failed, size=" << this->Size(); + MS_LOG(ERROR) << "Malloc tensor data failed, size=" << data_size; return RET_ERROR; } - + this->own_data_ = true; return RET_OK; } @@ -317,6 +315,9 @@ void Tensor::FreeData() { if (nullptr == this->data_) { return; } + if (!this->own_data_) { + return; + } if (nullptr == allocator_) { free(this->data_); this->data_ = nullptr; @@ -366,10 +367,6 @@ std::vector Tensor::quant_clusters() const { return this->quant_clusters_ void Tensor::set_quant_clusters(const std::vector &clusters) { this->quant_clusters_ = clusters; } -bool Tensor::enable_huffman_code() const { return enable_huffman_code_; } - -void Tensor::set_enable_huffman_code(bool enable_huffman_code) { this->enable_huffman_code_ = enable_huffman_code; } - std::vector TensorVectorCast(const std::vector &src) { std::vector target(src.size()); std::transform(src.begin(), src.end(), target.begin(), [](Tensor *t) { return dynamic_cast(t); }); diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 67f700123b..4929aa69d2 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -121,7 +121,10 @@ class Tensor : public mindspore::tensor::MSTensor { return data_; } - void set_data(void *data) override { this->data_ = data; } + void set_data(void *data) override { + this->data_ = data; + this->own_data_ = true; + } Category category() const { return this->category_; } @@ -153,10 +156,6 @@ class Tensor : public mindspore::tensor::MSTensor { void set_quant_clusters(const std::vector &clusters); - bool enable_huffman_code() const; - - void set_enable_huffman_code(bool enable_huffman_code); - virtual bool IsConst() const { return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; } @@ -173,7 +172,7 @@ class Tensor : public mindspore::tensor::MSTensor { } } - virtual int set_root_tensor(Tensor *tensor); + virtual void set_root_tensor(Tensor *tensor); Tensor *root_tensor() const { return this->root_tensor_; } @@ -181,6 +180,10 @@ class Tensor : public mindspore::tensor::MSTensor { return this->IsConst() || (this->IsGraphInput() && this->data_ != nullptr) || this->ref_count_ >= 1; } + bool own_data() const { return this->own_data_; } + + void set_own_data(bool own_data) { this->own_data_ = own_data; } + private: template std::string DataToString(void *data, size_t data_number) const { @@ -208,7 +211,7 @@ class Tensor : public mindspore::tensor::MSTensor { std::vector quant_clusters_; mindspore::Allocator *allocator_ = nullptr; Tensor *root_tensor_ = nullptr; - bool enable_huffman_code_ = false; + bool own_data_{false}; }; inline size_t DataTypeSize(const TypeId type) { diff --git a/mindspore/lite/src/tensorlist.cc b/mindspore/lite/src/tensorlist.cc index 531fc128a7..cf434d398c 100644 --- a/mindspore/lite/src/tensorlist.cc +++ b/mindspore/lite/src/tensorlist.cc @@ -199,23 +199,15 @@ int TensorList::CheckTensorListParam() { return RET_OK; } -int TensorList::set_root_tensor(Tensor *tensor) { - auto ret = Tensor::set_root_tensor(tensor); - if (ret != RET_OK) { - return ret; - } - if (this->data_type_ != kObjectTypeTensorType) { - return RET_OK; +void TensorList::set_root_tensor(Tensor *tensor) { + Tensor::set_root_tensor(tensor); + if (this->data_type_ != kObjectTypeTensorType || tensor == nullptr) { + return; } auto root_tensorlist = reinterpret_cast(this->root_tensor_); - if (root_tensorlist == nullptr) { - MS_LOG(ERROR) << "root_tensor of tensorlist should be a tensorlist"; - return RET_INFER_INVALID; - } this->element_shape_ = root_tensorlist->element_shape_; this->max_elements_num_ = root_tensorlist->max_elements_num_; this->tensors_data_type_ = root_tensorlist->tensors_data_type_; - return RET_OK; } Tensor *TensorList::GetTensor(int index) { diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h index 1fae3348aa..24ad385dad 100644 --- a/mindspore/lite/src/tensorlist.h +++ b/mindspore/lite/src/tensorlist.h @@ -109,11 +109,10 @@ class TensorList : public Tensor { bool IsConst() const override; - int set_root_tensor(Tensor *tensor) override; + void set_root_tensor(Tensor *tensor) override; protected: // The following functions must be masked. - void set_data(void *data) override {} void *data_c() const override { return nullptr; } void *MutableData() override { return nullptr; } size_t Size() const override { return 0; } diff --git a/mindspore/lite/test/models_onnx_fp16.cfg b/mindspore/lite/test/models_onnx_fp16.cfg index eedfd269c9..64da7aa5d5 100644 --- a/mindspore/lite/test/models_onnx_fp16.cfg +++ b/mindspore/lite/test/models_onnx_fp16.cfg @@ -37,7 +37,7 @@ adversarial_pruning.onnx 3 residual_distill_res34_cifar10_bs_1_update.onnx 2 residual_distill_res50_cifar10_bs_1_update.onnx 2 #ml_voice_detect.onnx #out of float16 range because power op -hdc_ocr_attention.onnx 1 +hdc_ocr_attention.onnx 1.6 hdc_ocr_detect.onnx 30 #one of the output has small values ml_edu_kit_hand_detection.onnx 2 ml_edu_kit_hand_key_position.onnx 2