!11510 [MS][LITE] weight quant support huffman code

From: @jianghui58
Reviewed-by: 
Signed-off-by:
pull/11510/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 86dfebd837

@ -57,6 +57,7 @@ table Tensor {
quantParams: [QuantParam];
quantClusters: [float];
name: string;
enableHuffmanCode: bool = false;
}
union PrimitiveType {

@ -37,6 +37,7 @@ set(LITE_SRC
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
)
if(SUPPORT_GPU)

@ -0,0 +1,168 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/huffman_decode.h"
namespace mindspore {
namespace lite {
STATUS huffman_decode::DoHuffmanDecode(const std::string &input_str, void *decoded_data) {
if (decoded_data == nullptr) {
MS_LOG(ERROR) << "decoded_data is nullptr.";
return RET_ERROR;
}
int status;
std::string huffman_decoded_str = "";
auto key_pos = input_str.find_first_of('#');
auto code_pos = input_str.find_first_of('#', key_pos + 1);
auto key = input_str.substr(0, key_pos);
auto code = input_str.substr(key_pos + 1, code_pos - key_pos - 1);
auto encoded_data = input_str.substr(code_pos + 1);
auto root = new (std::nothrow) HuffmanNode();
if (root == nullptr) {
MS_LOG(ERROR) << "new HuffmanNode failed.";
return RET_MEMORY_FAILED;
}
root->left = nullptr;
root->right = nullptr;
root->parent = nullptr;
status = RebuildHuffmanTree(key, code, root);
if (status != RET_OK) {
MS_LOG(ERROR) << "Rebuild huffman tree failed.";
delete root;
return status;
}
status = DoHuffmanDecompress(root, encoded_data, &huffman_decoded_str);
if (status != RET_OK) {
MS_LOG(ERROR) << "DoHuffmanDecompress failed.";
delete root;
return status;
}
size_t len = huffman_decoded_str.length();
memcpy(decoded_data, huffman_decoded_str.c_str(), len);
delete root;
return RET_OK;
}
STATUS huffman_decode::RebuildHuffmanTree(std::string keys, std::string codes, const HuffmanNodePtr &root) {
HuffmanNodePtr cur_node, tmp_node, new_node;
auto huffman_keys = Str2Vec(std::move(keys));
auto huffman_codes = Str2Vec(std::move(codes));
for (size_t i = 0; i < huffman_codes.size(); ++i) {
auto key = stoi(huffman_keys[i]);
auto code = huffman_codes[i];
auto code_len = code.length();
cur_node = root;
for (size_t j = 0; j < code_len; ++j) {
if (code[j] == '0') {
tmp_node = cur_node->left;
} else if (code[j] == '1') {
tmp_node = cur_node->right;
} else {
MS_LOG(ERROR) << "find huffman code is not 0 or 1";
return RET_ERROR;
}
if (tmp_node == nullptr) {
new_node = new (std::nothrow) HuffmanNode();
if (new_node == nullptr) {
MS_LOG(ERROR) << "new HuffmanNode failed.";
return RET_MEMORY_FAILED;
}
this->huffman_nodes_.push_back(new_node);
new_node->left = nullptr;
new_node->right = nullptr;
new_node->parent = cur_node;
if (j == code_len - 1) {
new_node->key = key;
new_node->code = code;
}
if (code[j] == '0') {
cur_node->left = new_node;
} else {
cur_node->right = new_node;
}
tmp_node = new_node;
} else if (j == code_len - 1) {
MS_LOG(ERROR) << "the huffman code is incomplete.";
return RET_ERROR;
} else if (tmp_node->left == nullptr && tmp_node->right == nullptr) {
MS_LOG(ERROR) << "the huffman code is incomplete";
return RET_ERROR;
}
cur_node = tmp_node;
}
}
return RET_OK;
}
STATUS huffman_decode::DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str) {
HuffmanNodePtr cur_node = root;
bool pseudo_eof = false;
size_t pos = 0;
unsigned char flag;
decoded_str->clear();
while (pos < encoded_data.length()) {
auto u_char = static_cast<unsigned char>(encoded_data[pos]);
flag = 0x80;
for (size_t i = 0; i < 8; ++i) { // traverse the 8 bit num, to find the leaf node
if (u_char & flag) {
cur_node = cur_node->right;
} else {
cur_node = cur_node->left;
}
if (cur_node->left == nullptr && cur_node->right == nullptr) {
auto key = cur_node->key;
if (key == PSEUDO_EOF) {
pseudo_eof = true;
break;
} else {
*decoded_str += static_cast<char>(cur_node->key);
cur_node = root;
}
}
flag = flag >> 1;
}
pos++;
if (pseudo_eof) {
break;
}
}
return RET_OK;
}
huffman_decode::~huffman_decode() {
for (auto &node : this->huffman_nodes_) {
delete node;
}
this->huffman_nodes_.resize(0);
}
} // namespace lite
} // namespace mindspore

@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
#define MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
#include <cstring>
#include <utility>
#include <string>
#include <vector>
#include "include/errorcode.h"
#include "src/common/log_adapter.h"
namespace mindspore {
namespace lite {
const int PSEUDO_EOF = 128;
struct HuffmanNode {
int key;
unsigned int freq;
std::string code;
HuffmanNode *left, *right, *parent;
};
using HuffmanNodePtr = HuffmanNode *;
class huffman_decode {
public:
huffman_decode() = default;
~huffman_decode();
STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data);
private:
std::vector<HuffmanNodePtr> huffman_nodes_;
STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root);
STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str);
std::vector<std::string> Str2Vec(std::string s) {
size_t i = 0;
std::vector<std::string> vec;
while (i < s.length()) {
size_t j = i;
while (j < s.length() && s[j] != ' ') {
j++;
}
if (j != i) {
vec.push_back(s.substr(i, j - i));
i = j + 1;
} else {
i = j;
}
}
return vec;
}
};
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_

@ -28,6 +28,7 @@
#include "src/kernel_registry.h"
#include "src/lite_model.h"
#include "src/dequant.h"
#include "src/huffman_decode.h"
#if SUPPORT_NPU
#include "src/runtime/agent/npu/npu_manager.h"
#include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
@ -74,6 +75,7 @@ void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lit
dst_tensor->AddQuantParam(quant_arg);
}
}
dst_tensor->SetEnableHuffmanCode(src_tensor->enableHuffmanCode());
auto quant_clusters = src_tensor->quantClusters();
if (quant_clusters != nullptr) {
std::vector<float> clusters;
@ -94,6 +96,13 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
int org_size = dst_tensor->Size();
return (pack_size != org_size) && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16);
};
auto NeedHuffmanDecode = [&src_tensor, &dst_tensor]() -> bool {
auto data_type = src_tensor->dataType();
auto enable_huffman_code = src_tensor->enableHuffmanCode();
int pack_size = src_tensor->data()->size();
int org_size = dst_tensor->Size();
return (pack_size != org_size) && (data_type == kNumberTypeInt8) && enable_huffman_code;
};
auto src_category = TensorCategory(src_tensor);
if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) &&
src_tensor->data() != nullptr && src_tensor->data()->size() > 0) {
@ -107,6 +116,21 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
return RET_ERROR;
}
} else {
if (NeedHuffmanDecode()) {
auto dst_data = dst_tensor->MutableData();
if (dst_data == nullptr) {
MS_LOG(ERROR) << "Data from tensor is nullptr";
return RET_NULL_PTR;
}
std::string encode_str(src_tensor->data()->begin(), src_tensor->data()->end());
auto huffman_decode = std::make_unique<lite::huffman_decode>();
auto ret = huffman_decode->DoHuffmanDecode(encode_str, dst_data);
if (ret != RET_OK) {
MS_LOG(ERROR) << "DoHuffmanDecode failed.";
return ret;
}
copyed_tensor_idxes_.emplace_back(tensor_index);
}
if (WeightTensorNeedCopy(model, tensor_index)) {
auto dst_data = dst_tensor->MutableData();
if (dst_data == nullptr) {

@ -450,6 +450,10 @@ void PrimitiveC::set_quant_type(const schema::QuantType &quant_type) { this->qua
schema::QuantType PrimitiveC::quant_type() const { return quant_type_; }
bool PrimitiveC::IsEnableHuffmanCode() const { return enableHuffmanCode; }
void PrimitiveC::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; }
std::shared_ptr<PrimitiveC> GetReturnPrim() {
auto return_primitiveT = new (std::nothrow) schema::PrimitiveT;
if (return_primitiveT == nullptr) {

@ -123,6 +123,10 @@ class PrimitiveC : public mindspore::Primitive {
schema::QuantType quant_type() const;
bool IsEnableHuffmanCode() const;
void SetEnableHuffmanCode(bool enableHuffmanCode);
virtual int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs);
bool infer_flag() const;
@ -154,6 +158,7 @@ class PrimitiveC : public mindspore::Primitive {
schema::QuantType quant_type_{schema::QuantType_QUANT_NONE};
bool infer_flag_ = true;
int op_type_ = OP_TYPE_NOT_SET;
bool enableHuffmanCode = false;
};
std::shared_ptr<PrimitiveC> GetReturnPrim();

@ -367,6 +367,10 @@ std::vector<float> Tensor::quant_clusters() const { return this->quant_clusters_
void Tensor::set_quant_clusters(const std::vector<float> &clusters) { this->quant_clusters_ = clusters; }
bool Tensor::IsEnableHuffmanCode() const { return enableHuffmanCode; }
void Tensor::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; }
std::vector<tensor::MSTensor *> TensorVectorCast(const std::vector<Tensor *> &src) {
std::vector<tensor::MSTensor *> target(src.size());
std::transform(src.begin(), src.end(), target.begin(), [](Tensor *t) { return dynamic_cast<tensor::MSTensor *>(t); });

@ -149,6 +149,10 @@ class Tensor : public mindspore::tensor::MSTensor {
void set_quant_clusters(const std::vector<float> &clusters);
bool IsEnableHuffmanCode() const;
void SetEnableHuffmanCode(bool enableHuffmanCode);
virtual bool IsConst() const {
return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr;
}
@ -198,6 +202,7 @@ class Tensor : public mindspore::tensor::MSTensor {
std::vector<float> quant_clusters_;
mindspore::lite::Allocator *allocator_ = nullptr;
Tensor *root_tensor_ = nullptr;
bool enableHuffmanCode = false;
};
inline size_t DataTypeSize(const TypeId type) {

@ -132,6 +132,7 @@ set(TEST_LITE_SRC
${LITE_DIR}/src/lite_kernel.cc
${LITE_DIR}/src/lite_session.cc
${LITE_DIR}/src/dequant.cc
${LITE_DIR}/src/huffman_decode.cc
${LITE_DIR}/src/sub_graph_kernel.cc
${LITE_DIR}/src/lite_model.cc
${LITE_DIR}/src/scheduler.cc

@ -1 +1,2 @@
ml_face_openclose.tflite
ml_face_openclose.tflite 0.5
hiai_ghostnet.tflite 5

@ -221,13 +221,14 @@ function Run_Converter() {
# Convert tflite weightquant models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
echo ${model_name} >> "${run_converter_log_file}"
echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}"
./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16
echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 --enableHuffmanCode=true' >> "${run_converter_log_file}"
./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 --enableHuffmanCode=true
if [ $? = 0 ]; then
converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
else
@ -515,15 +516,17 @@ function Run_x86() {
# Run tflite weight quantization converted models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
echo ${model_name} >> "${run_x86_log_file}"
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64' >> "${run_x86_log_file}"
cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64 || return 1
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}"
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}"
if [ $? = 0 ]; then
run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
@ -781,15 +784,17 @@ function Run_x86_sse() {
# Run tflite weight quantization converted models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
echo ${model_name} >> "${run_x86_sse_log_file}"
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-sse' >> "${run_x86_sse_log_file}"
cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-sse || return 1
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}"
if [ $? = 0 ]; then
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
@ -1047,15 +1052,17 @@ function Run_x86_avx() {
# Run tflite weight quantization converted models:
while read line; do
model_name=${line}
if [[ $model_name == \#* ]]; then
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
echo ${model_name} >> "${run_x86_avx_log_file}"
echo 'cd '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-avx' >> "${run_x86_avx_log_file}"
cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-avx || return 1
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_avx_log_file}"
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}"
if [ $? = 0 ]; then
run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else

File diff suppressed because it is too large Load Diff

@ -45,10 +45,27 @@ class AnfExporter {
protected:
int ConvertInputCNode(const std::shared_ptr<AnfNode> &input_anode, schema::CNodeT *output_cnode);
int ConvertInputParameter(const std::shared_ptr<AnfNode> &input_anode,
int ConvertInputParameter(const std::shared_ptr<AnfNode> &input_anode, const std::shared_ptr<PrimitiveC> &primitive,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT, schema::CNodeT *output_cnode);
int ConvertInputValueNode(const std::shared_ptr<AnfNode> &input_anode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT, schema::CNodeT *output_cnode);
int ProcessTensor(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
int ProcessInt32OrInt64Imm(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
void ProcessBoolImm(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
void ProcessInt(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
schema::CNodeT *output_cnode, const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
int ProcessValueSequence(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
int ProcessParamValueLite(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
int SetGraphInputIndex(const std::unique_ptr<schema::MetaGraphT> &meta_graphT, const size_t &subgraph_index);
int SetGraphoutputIndex(const CNodePtr &cnode, const size_t subgraph_index,
const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
@ -58,6 +75,9 @@ class AnfExporter {
static int ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph,
const std::shared_ptr<PrimitiveC> &primitive,
const std::unique_ptr<schema::CNodeT> &dst_node);
int Anf2Fb(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
const size_t &subgraph_index, const bool &keep_graph, const bool &copy_primitive,
const std::unique_ptr<schema::SubGraphT> &sub_graphT);
int ExportSubgraph(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
const size_t &subgraph_index, bool keep_graph, bool copy_primitive,
const std::shared_ptr<AnfNode> &partial_anode = nullptr);

@ -100,6 +100,7 @@ set(LITE_SRC
${SRC_DIR}/lite_model.cc
${SRC_DIR}/errorcode.cc
${SRC_DIR}/dequant.cc
${SRC_DIR}/huffman_decode.cc
)
if(SUPPORT_TRAIN)
set(LITE_SRC

@ -51,6 +51,7 @@
#include "tools/optimizer/graph/functionalize_control_op_pass.h"
#include "tools/converter/quantizer/post_training_quantizer.h"
#include "tools/converter/quantizer/quant_cast.h"
#include "tools/converter/quantizer/huffman_encode.h"
#include "tools/converter/quantizer/weight_quantizer.h"
using std::string;
@ -252,6 +253,19 @@ int AnfTransform::DoQuantize(const FuncGraphPtr &old_graph, const converter::Fla
return RET_OK;
}
int AnfTransform::DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph) {
if (config->quantType == schema::QuantType_WeightQuant && config->bitNum == "8" && config->enableHuffmanCode) {
auto huffman_encode = std::make_unique<lite::huffman_encode>();
auto status = huffman_encode->DoHuffmanEncode(new_graph);
if (status != RET_OK) {
MS_LOG(ERROR) << "Huffman encode failed.";
ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
return RET_ERROR;
}
}
return RET_OK;
}
FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_graph, const converter::Flags *config) {
MS_ASSERT(nullptr != old_graph);
if (config == nullptr) {
@ -305,6 +319,13 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap
MS_LOG(ERROR) << "Do Quantize failed.";
return nullptr;
}
status = DoHuffmanEncode(config, new_graph);
if (status != RET_OK) {
MS_LOG(ERROR) << "Do HuffmanCode failed.";
return nullptr;
}
return new_graph;
}

@ -58,6 +58,8 @@ class AnfTransform {
int RunTFAdjustPass(const FuncGraphPtr &old_graph, const converter::Flags *config);
int DoQuantize(const FuncGraphPtr &old_graph, const converter::Flags *config, const FuncGraphPtr &new_graph);
int DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph);
};
} // namespace lite
} // namespace mindspore

@ -42,53 +42,17 @@ Flags::Flags() {
AddFlag(&Flags::quantWeightSize, "quantWeightSize", "Weight quantization size threshold", "0");
AddFlag(&Flags::quantWeightChannel, "quantWeightChannel", "Channel threshold for weight quantization", "16");
AddFlag(&Flags::configFile, "configFile", "Configuration for post-training.", "");
AddFlag(&Flags::enableHuffmanCodeIn, "enableHuffmanCode",
"whether the weight quant model is going to use huffman code."
"true | false",
"false");
AddFlag(&Flags::trainModelIn, "trainModel",
"whether the model is going to be trained on device."
"true | false",
"false");
}
int Flags::Init(int argc, const char **argv) {
if (argc == 1) {
std::cout << this->Usage() << std::endl;
return RET_SUCCESS_EXIT;
}
Option<std::string> err = this->ParseFlags(argc, argv);
if (err.IsSome()) {
std::cerr << err.Get();
std::cerr << this->Usage() << std::endl;
return RET_INPUT_PARAM_INVALID;
}
if (this->help) {
std::cout << this->Usage() << std::endl;
return RET_SUCCESS_EXIT;
}
if (this->modelFile.empty()) {
std::cerr << "INPUT MISSING: model file path is necessary";
return RET_INPUT_PARAM_INVALID;
}
if (this->outputFile.empty()) {
std::cerr << "INPUT MISSING: output file path is necessary";
return RET_INPUT_PARAM_INVALID;
}
#ifdef _WIN32
replace(this->outputFile.begin(), this->outputFile.end(), '/', '\\');
#endif
if (this->outputFile.rfind('/') == this->outputFile.length() - 1 ||
this->outputFile.rfind('\\') == this->outputFile.length() - 1) {
std::cerr << "INPUT ILLEGAL: outputFile must be a valid file path";
return RET_INPUT_PARAM_INVALID;
}
if (this->fmkIn.empty()) {
std::cerr << "INPUT MISSING: fmk is necessary";
return RET_INPUT_PARAM_INVALID;
}
int Flags::InitInputOutputDataType() {
if (this->inputDataTypeIn == "FLOAT") {
this->inputDataType = TypeId::kNumberTypeFloat32;
} else if (this->inputDataTypeIn == "INT8") {
@ -117,7 +81,10 @@ int Flags::Init(int argc, const char **argv) {
this->outputDataTypeIn.c_str();
return RET_INPUT_PARAM_INVALID;
}
return RET_OK;
}
int Flags::InitFmk() {
if (this->fmkIn == "CAFFE") {
this->fmk = FmkType_CAFFE;
} else if (this->fmkIn == "MINDIR") {
@ -137,7 +104,10 @@ int Flags::Init(int argc, const char **argv) {
std::cerr << "INPUT ILLEGAL: weightFile is not a valid flag";
return RET_INPUT_PARAM_INVALID;
}
return RET_OK;
}
int Flags::InitQuantType() {
if (this->quantTypeIn == "WeightQuant") {
this->quantType = QuantType_WeightQuant;
} else if (this->quantTypeIn == "PostTraining") {
@ -148,7 +118,22 @@ int Flags::Init(int argc, const char **argv) {
std::cerr << "INPUT ILLEGAL: quantType must be WeightQuant|PostTraining";
return RET_INPUT_PARAM_INVALID;
}
return RET_OK;
}
int Flags::InitHuffmanCode() {
if (this->enableHuffmanCodeIn == "true") {
this->enableHuffmanCode = true;
} else if (this->enableHuffmanCodeIn == "false") {
this->enableHuffmanCode = false;
} else {
std::cerr << "INPUT ILLEGAL: trainModel must be true|false ";
return RET_INPUT_PARAM_INVALID;
}
return RET_OK;
}
int Flags::InitTrainModel() {
if (this->trainModelIn == "true") {
this->trainModel = true;
} else if (this->trainModelIn == "false") {
@ -160,24 +145,99 @@ int Flags::Init(int argc, const char **argv) {
if (this->trainModel) {
if (this->fmk != FmkType_MS) {
std::cerr << "INPUT ILLEGAL: train model convertor supporting only MINDIR format";
std::cerr << "INPUT ILLEGAL: train model converter supporting only MINDIR format";
return RET_INPUT_PARAM_INVALID;
}
if ((this->inputDataType != TypeId::kNumberTypeFloat32) && (this->inputDataType != TypeId::kTypeUnknown)) {
std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 input tensors";
std::cerr << "INPUT ILLEGAL: train model converter supporting only FP32 input tensors";
return RET_INPUT_PARAM_INVALID;
}
if ((this->outputDataType != TypeId::kNumberTypeFloat32) && (this->outputDataType != TypeId::kTypeUnknown)) {
std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 output tensors";
std::cerr << "INPUT ILLEGAL: train model converter supporting only FP32 output tensors";
return RET_INPUT_PARAM_INVALID;
}
if (this->quantType != QuantType_QUANT_NONE) {
std::cerr << "INPUT ILLEGAL: train model convertor is not supporting quantization";
std::cerr << "INPUT ILLEGAL: train model converter is not supporting quantization";
return RET_INPUT_PARAM_INVALID;
}
}
return RET_OK;
}
int Flags::Init(int argc, const char **argv) {
int ret;
if (argc == 1) {
std::cout << this->Usage() << std::endl;
return RET_SUCCESS_EXIT;
}
Option<std::string> err = this->ParseFlags(argc, argv);
if (err.IsSome()) {
std::cerr << err.Get();
std::cerr << this->Usage() << std::endl;
return RET_INPUT_PARAM_INVALID;
}
if (this->help) {
std::cout << this->Usage() << std::endl;
return RET_SUCCESS_EXIT;
}
if (this->modelFile.empty()) {
std::cerr << "INPUT MISSING: model file path is necessary";
return RET_INPUT_PARAM_INVALID;
}
if (this->outputFile.empty()) {
std::cerr << "INPUT MISSING: output file path is necessary";
return RET_INPUT_PARAM_INVALID;
}
#ifdef _WIN32
replace(this->outputFile.begin(), this->outputFile.end(), '/', '\\');
#endif
if (this->outputFile.rfind('/') == this->outputFile.length() - 1 ||
this->outputFile.rfind('\\') == this->outputFile.length() - 1) {
std::cerr << "INPUT ILLEGAL: outputFile must be a valid file path";
return RET_INPUT_PARAM_INVALID;
}
if (this->fmkIn.empty()) {
std::cerr << "INPUT MISSING: fmk is necessary";
return RET_INPUT_PARAM_INVALID;
}
ret = InitInputOutputDataType();
if (ret != RET_OK) {
std::cerr << "Init input output datatype failed.";
return RET_INPUT_PARAM_INVALID;
}
ret = InitFmk();
if (ret != RET_OK) {
std::cerr << "Init fmk failed.";
return RET_INPUT_PARAM_INVALID;
}
ret = InitQuantType();
if (ret != RET_OK) {
std::cerr << "Init quant type failed.";
return RET_INPUT_PARAM_INVALID;
}
ret = InitHuffmanCode();
if (ret != RET_OK) {
std::cerr << "Init huffman code failed.";
return RET_INPUT_PARAM_INVALID;
}
ret = InitTrainModel();
if (ret != RET_OK) {
std::cerr << "Init train model failed.";
return RET_INPUT_PARAM_INVALID;
}
return RET_OK;
}
} // namespace converter
} // namespace lite
} // namespace mindspore

@ -45,6 +45,16 @@ class Flags : public virtual mindspore::lite::FlagParser {
~Flags() override = default;
int InitInputOutputDataType();
int InitFmk();
int InitQuantType();
int InitHuffmanCode();
int InitTrainModel();
int Init(int argc, const char **argv);
public:
@ -70,6 +80,8 @@ class Flags : public virtual mindspore::lite::FlagParser {
std::string bitNum;
std::string configFile;
std::string quantWeightChannel;
std::string enableHuffmanCodeIn;
bool enableHuffmanCode = false;
std::string trainModelIn;
bool trainModel = false;
};

@ -10,6 +10,7 @@ file(GLOB QUANTIZER
${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc
${CMAKE_CURRENT_SOURCE_DIR}/weight_quantizer.cc
${CMAKE_CURRENT_SOURCE_DIR}/huffman_encode.cc
)
set_property(SOURCE ${QUANTIZER} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_LITE)
add_library(quantizer_mid OBJECT ${QUANTIZER})

File diff suppressed because it is too large Load Diff

@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H
#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include <queue>
#include <map>
#include <fstream>
#include "src/common/log_adapter.h"
#include "src/ops/primitive_c.h"
#include "ir/func_graph.h"
namespace mindspore {
namespace lite {
using STATUS = int;
const int PSEUDO_EOF = 128;
struct HuffmanNode {
int key;
unsigned int freq;
std::string code;
HuffmanNode *left, *right, *parent;
};
using HuffmanNodePtr = HuffmanNode *;
struct cmp {
public:
bool operator()(const HuffmanNodePtr &c1, const HuffmanNodePtr &c2) const { return c1->freq > c2->freq; }
};
using HuffmanPriorityQueue = std::priority_queue<HuffmanNodePtr, std::vector<HuffmanNodePtr>, cmp>;
class huffman_encode {
public:
huffman_encode() = default;
~huffman_encode();
STATUS DoHuffmanEncode(const FuncGraphPtr &func_graph);
private:
std::map<int, std::string> huffman_table_;
std::string huffman_encoded_str_ = "";
std::vector<HuffmanNodePtr> huffman_nodes_;
STATUS GetHuffmanPriorityQueue(const int8_t *input_datas, size_t input_data_size, HuffmanPriorityQueue *pq);
void GenerateHuffmanTable(HuffmanNodePtr node, bool is_left_node);
STATUS BuildHuffmanTree(HuffmanPriorityQueue *pq);
STATUS DoHuffmanCompress(const int8_t *input_datas, size_t data_size);
};
} // namespace lite
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H
Loading…
Cancel
Save