!11510 [MS][LITE] weight quant support huffman code

From: @jianghui58 Reviewed-by: Signed-off-by:
4 years ago · 86dfebd837
parent 09acd1680f 70d575c76a
commit 86dfebd837
22 changed files with 1016 additions and 203 deletions
--- a/mindspore/lite/schema/model.fbs
+++ b/mindspore/lite/schema/model.fbs
@ -57,6 +57,7 @@ table Tensor {
    quantParams: [QuantParam];
    quantClusters: [float];
    name: string;
+    enableHuffmanCode: bool = false;
 }

 union PrimitiveType {
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@ -37,6 +37,7 @@ set(LITE_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/dequant.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
        )

 if(SUPPORT_GPU)
--- a/mindspore/lite/src/huffman_decode.cc
+++ b/mindspore/lite/src/huffman_decode.cc
@ -0,0 +1,168 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/huffman_decode.h"
+
+namespace mindspore {
+namespace lite {
+
+STATUS huffman_decode::DoHuffmanDecode(const std::string &input_str, void *decoded_data) {
+  if (decoded_data == nullptr) {
+    MS_LOG(ERROR) << "decoded_data is nullptr.";
+    return RET_ERROR;
+  }
+
+  int status;
+  std::string huffman_decoded_str = "";
+
+  auto key_pos = input_str.find_first_of('#');
+  auto code_pos = input_str.find_first_of('#', key_pos + 1);
+  auto key = input_str.substr(0, key_pos);
+  auto code = input_str.substr(key_pos + 1, code_pos - key_pos - 1);
+  auto encoded_data = input_str.substr(code_pos + 1);
+
+  auto root = new (std::nothrow) HuffmanNode();
+  if (root == nullptr) {
+    MS_LOG(ERROR) << "new HuffmanNode failed.";
+    return RET_MEMORY_FAILED;
+  }
+  root->left = nullptr;
+  root->right = nullptr;
+  root->parent = nullptr;
+
+  status = RebuildHuffmanTree(key, code, root);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Rebuild huffman tree failed.";
+    delete root;
+    return status;
+  }
+
+  status = DoHuffmanDecompress(root, encoded_data, &huffman_decoded_str);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "DoHuffmanDecompress failed.";
+    delete root;
+    return status;
+  }
+
+  size_t len = huffman_decoded_str.length();
+  memcpy(decoded_data, huffman_decoded_str.c_str(), len);
+
+  delete root;
+  return RET_OK;
+}
+
+STATUS huffman_decode::RebuildHuffmanTree(std::string keys, std::string codes, const HuffmanNodePtr &root) {
+  HuffmanNodePtr cur_node, tmp_node, new_node;
+
+  auto huffman_keys = Str2Vec(std::move(keys));
+  auto huffman_codes = Str2Vec(std::move(codes));
+
+  for (size_t i = 0; i < huffman_codes.size(); ++i) {
+    auto key = stoi(huffman_keys[i]);
+    auto code = huffman_codes[i];
+    auto code_len = code.length();
+    cur_node = root;
+    for (size_t j = 0; j < code_len; ++j) {
+      if (code[j] == '0') {
+        tmp_node = cur_node->left;
+      } else if (code[j] == '1') {
+        tmp_node = cur_node->right;
+      } else {
+        MS_LOG(ERROR) << "find huffman code is not 0 or 1";
+        return RET_ERROR;
+      }
+
+      if (tmp_node == nullptr) {
+        new_node = new (std::nothrow) HuffmanNode();
+        if (new_node == nullptr) {
+          MS_LOG(ERROR) << "new HuffmanNode failed.";
+          return RET_MEMORY_FAILED;
+        }
+        this->huffman_nodes_.push_back(new_node);
+        new_node->left = nullptr;
+        new_node->right = nullptr;
+        new_node->parent = cur_node;
+
+        if (j == code_len - 1) {
+          new_node->key = key;
+          new_node->code = code;
+        }
+
+        if (code[j] == '0') {
+          cur_node->left = new_node;
+        } else {
+          cur_node->right = new_node;
+        }
+
+        tmp_node = new_node;
+      } else if (j == code_len - 1) {
+        MS_LOG(ERROR) << "the huffman code is incomplete.";
+        return RET_ERROR;
+      } else if (tmp_node->left == nullptr && tmp_node->right == nullptr) {
+        MS_LOG(ERROR) << "the huffman code is incomplete";
+        return RET_ERROR;
+      }
+      cur_node = tmp_node;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS huffman_decode::DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str) {
+  HuffmanNodePtr cur_node = root;
+  bool pseudo_eof = false;
+  size_t pos = 0;
+  unsigned char flag;
+
+  decoded_str->clear();
+  while (pos < encoded_data.length()) {
+    auto u_char = static_cast<unsigned char>(encoded_data[pos]);
+    flag = 0x80;
+    for (size_t i = 0; i < 8; ++i) {  // traverse the 8 bit num, to find the leaf node
+      if (u_char & flag) {
+        cur_node = cur_node->right;
+      } else {
+        cur_node = cur_node->left;
+      }
+      if (cur_node->left == nullptr && cur_node->right == nullptr) {
+        auto key = cur_node->key;
+        if (key == PSEUDO_EOF) {
+          pseudo_eof = true;
+          break;
+        } else {
+          *decoded_str += static_cast<char>(cur_node->key);
+          cur_node = root;
+        }
+      }
+      flag = flag >> 1;
+    }
+    pos++;
+    if (pseudo_eof) {
+      break;
+    }
+  }
+  return RET_OK;
+}
+
+huffman_decode::~huffman_decode() {
+  for (auto &node : this->huffman_nodes_) {
+    delete node;
+  }
+  this->huffman_nodes_.resize(0);
+}
+
+}  // namespace lite
+}  // namespace mindspore
--- a/mindspore/lite/src/huffman_decode.h
+++ b/mindspore/lite/src/huffman_decode.h
@ -0,0 +1,77 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
+#define MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
+
+#include <cstring>
+#include <utility>
+#include <string>
+#include <vector>
+
+#include "include/errorcode.h"
+#include "src/common/log_adapter.h"
+
+namespace mindspore {
+namespace lite {
+
+const int PSEUDO_EOF = 128;
+
+struct HuffmanNode {
+  int key;
+  unsigned int freq;
+  std::string code;
+  HuffmanNode *left, *right, *parent;
+};
+using HuffmanNodePtr = HuffmanNode *;
+
+class huffman_decode {
+ public:
+  huffman_decode() = default;
+
+  ~huffman_decode();
+
+  STATUS DoHuffmanDecode(const std::string &input_str, void *decoded_data);
+
+ private:
+  std::vector<HuffmanNodePtr> huffman_nodes_;
+  STATUS RebuildHuffmanTree(std::string key, std::string code, const HuffmanNodePtr &root);
+
+  STATUS DoHuffmanDecompress(HuffmanNodePtr root, std::string encoded_data, std::string *decoded_str);
+
+  std::vector<std::string> Str2Vec(std::string s) {
+    size_t i = 0;
+    std::vector<std::string> vec;
+    while (i < s.length()) {
+      size_t j = i;
+      while (j < s.length() && s[j] != ' ') {
+        j++;
+      }
+      if (j != i) {
+        vec.push_back(s.substr(i, j - i));
+        i = j + 1;
+      } else {
+        i = j;
+      }
+    }
+    return vec;
+  }
+};
+
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@ -28,6 +28,7 @@
 #include "src/kernel_registry.h"
 #include "src/lite_model.h"
 #include "src/dequant.h"
+#include "src/huffman_decode.h"
 #if SUPPORT_NPU
 #include "src/runtime/agent/npu/npu_manager.h"
 #include "src/runtime/agent/npu/optimizer/npu_pass_manager.h"
@ -74,6 +75,7 @@ void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lit
      dst_tensor->AddQuantParam(quant_arg);
    }
  }
+  dst_tensor->SetEnableHuffmanCode(src_tensor->enableHuffmanCode());
  auto quant_clusters = src_tensor->quantClusters();
  if (quant_clusters != nullptr) {
    std::vector<float> clusters;
@ -94,6 +96,13 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
    int org_size = dst_tensor->Size();
    return (pack_size != org_size) && (data_type == kNumberTypeInt8 || data_type == kNumberTypeInt16);
  };
+  auto NeedHuffmanDecode = [&src_tensor, &dst_tensor]() -> bool {
+    auto data_type = src_tensor->dataType();
+    auto enable_huffman_code = src_tensor->enableHuffmanCode();
+    int pack_size = src_tensor->data()->size();
+    int org_size = dst_tensor->Size();
+    return (pack_size != org_size) && (data_type == kNumberTypeInt8) && enable_huffman_code;
+  };
  auto src_category = TensorCategory(src_tensor);
  if ((src_category == Tensor::Category::CONST_TENSOR || src_category == Tensor::Category::CONST_SCALAR) &&
      src_tensor->data() != nullptr && src_tensor->data()->size() > 0) {
@ -107,6 +116,21 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
        return RET_ERROR;
      }
    } else {
+      if (NeedHuffmanDecode()) {
+        auto dst_data = dst_tensor->MutableData();
+        if (dst_data == nullptr) {
+          MS_LOG(ERROR) << "Data from tensor is nullptr";
+          return RET_NULL_PTR;
+        }
+        std::string encode_str(src_tensor->data()->begin(), src_tensor->data()->end());
+        auto huffman_decode = std::make_unique<lite::huffman_decode>();
+        auto ret = huffman_decode->DoHuffmanDecode(encode_str, dst_data);
+        if (ret != RET_OK) {
+          MS_LOG(ERROR) << "DoHuffmanDecode failed.";
+          return ret;
+        }
+        copyed_tensor_idxes_.emplace_back(tensor_index);
+      }
      if (WeightTensorNeedCopy(model, tensor_index)) {
        auto dst_data = dst_tensor->MutableData();
        if (dst_data == nullptr) {
--- a/mindspore/lite/src/ops/primitive_c.cc
+++ b/mindspore/lite/src/ops/primitive_c.cc
@ -450,6 +450,10 @@ void PrimitiveC::set_quant_type(const schema::QuantType &quant_type) { this->qua

 schema::QuantType PrimitiveC::quant_type() const { return quant_type_; }

+bool PrimitiveC::IsEnableHuffmanCode() const { return enableHuffmanCode; }
+
+void PrimitiveC::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; }
+
 std::shared_ptr<PrimitiveC> GetReturnPrim() {
  auto return_primitiveT = new (std::nothrow) schema::PrimitiveT;
  if (return_primitiveT == nullptr) {
--- a/mindspore/lite/src/ops/primitive_c.h
+++ b/mindspore/lite/src/ops/primitive_c.h
@ -123,6 +123,10 @@ class PrimitiveC : public mindspore::Primitive {

  schema::QuantType quant_type() const;

+  bool IsEnableHuffmanCode() const;
+
+  void SetEnableHuffmanCode(bool enableHuffmanCode);
+
  virtual int InferShape(std::vector<lite::Tensor *> inputs, std::vector<lite::Tensor *> outputs);

  bool infer_flag() const;
@ -154,6 +158,7 @@ class PrimitiveC : public mindspore::Primitive {
  schema::QuantType quant_type_{schema::QuantType_QUANT_NONE};
  bool infer_flag_ = true;
  int op_type_ = OP_TYPE_NOT_SET;
+  bool enableHuffmanCode = false;
 };
 std::shared_ptr<PrimitiveC> GetReturnPrim();

--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@ -367,6 +367,10 @@ std::vector<float> Tensor::quant_clusters() const { return this->quant_clusters_

 void Tensor::set_quant_clusters(const std::vector<float> &clusters) { this->quant_clusters_ = clusters; }

+bool Tensor::IsEnableHuffmanCode() const { return enableHuffmanCode; }
+
+void Tensor::SetEnableHuffmanCode(bool enableHuffmanCode) { this->enableHuffmanCode = enableHuffmanCode; }
+
 std::vector<tensor::MSTensor *> TensorVectorCast(const std::vector<Tensor *> &src) {
  std::vector<tensor::MSTensor *> target(src.size());
  std::transform(src.begin(), src.end(), target.begin(), [](Tensor *t) { return dynamic_cast<tensor::MSTensor *>(t); });
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@ -149,6 +149,10 @@ class Tensor : public mindspore::tensor::MSTensor {

  void set_quant_clusters(const std::vector<float> &clusters);

+  bool IsEnableHuffmanCode() const;
+
+  void SetEnableHuffmanCode(bool enableHuffmanCode);
+
  virtual bool IsConst() const {
    return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr;
  }
@ -198,6 +202,7 @@ class Tensor : public mindspore::tensor::MSTensor {
  std::vector<float> quant_clusters_;
  mindspore::lite::Allocator *allocator_ = nullptr;
  Tensor *root_tensor_ = nullptr;
+  bool enableHuffmanCode = false;
 };

 inline size_t DataTypeSize(const TypeId type) {
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@ -132,6 +132,7 @@ set(TEST_LITE_SRC
        ${LITE_DIR}/src/lite_kernel.cc
        ${LITE_DIR}/src/lite_session.cc
        ${LITE_DIR}/src/dequant.cc
+        ${LITE_DIR}/src/huffman_decode.cc
        ${LITE_DIR}/src/sub_graph_kernel.cc
        ${LITE_DIR}/src/lite_model.cc
        ${LITE_DIR}/src/scheduler.cc
--- a/mindspore/lite/test/models_tflite_weightquant.cfg
+++ b/mindspore/lite/test/models_tflite_weightquant.cfg
@ -1 +1,2 @@
-ml_face_openclose.tflite
+ml_face_openclose.tflite 0.5
+hiai_ghostnet.tflite 5
--- a/mindspore/lite/test/run_benchmark_nets.sh
+++ b/mindspore/lite/test/run_benchmark_nets.sh
@ -221,13 +221,14 @@ function Run_Converter() {

    # Convert tflite weightquant models:
    while read line; do
-        model_name=${line}
-        if [[ $model_name == \#* ]]; then
+        weight_quant_line_info=${line}
+        if [[ $weight_quant_line_info == \#* ]]; then
          continue
        fi
+        model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
        echo ${model_name} >> "${run_converter_log_file}"
-        echo './converter_lite  --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}"
-        ./converter_lite  --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16
+        echo './converter_lite  --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 --enableHuffmanCode=true' >> "${run_converter_log_file}"
+        ./converter_lite  --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 --enableHuffmanCode=true
        if [ $? = 0 ]; then
            converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
        else
@ -515,15 +516,17 @@ function Run_x86() {

    # Run tflite weight quantization converted models:
    while read line; do
-        model_name=${line}
-        if [[ $model_name == \#* ]]; then
+        weight_quant_line_info=${line}
+        if [[ $weight_quant_line_info == \#* ]]; then
          continue
        fi
+        model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
+        accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
        echo ${model_name} >> "${run_x86_log_file}"
        echo 'cd  '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64' >> "${run_x86_log_file}"
        cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64 || return 1
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
-        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}"
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}"
        if [ $? = 0 ]; then
            run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
@ -781,15 +784,17 @@ function Run_x86_sse() {

    # Run tflite weight quantization converted models:
    while read line; do
-        model_name=${line}
-        if [[ $model_name == \#* ]]; then
+        weight_quant_line_info=${line}
+        if [[ $weight_quant_line_info == \#* ]]; then
          continue
        fi
+        model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
+        accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
        echo ${model_name} >> "${run_x86_sse_log_file}"
        echo 'cd  '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-sse' >> "${run_x86_sse_log_file}"
        cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-sse || return 1
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_sse_log_file}"
-        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_sse_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}"
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}"
        if [ $? = 0 ]; then
            run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
@ -1047,15 +1052,17 @@ function Run_x86_avx() {

    # Run tflite weight quantization converted models:
    while read line; do
-        model_name=${line}
-        if [[ $model_name == \#* ]]; then
+        weight_quant_line_info=${line}
+        if [[ $weight_quant_line_info == \#* ]]; then
          continue
        fi
+        model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
+        accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
        echo ${model_name} >> "${run_x86_avx_log_file}"
        echo 'cd  '${x86_path}'/mindspore-lite-'${version}'-inference-linux-x64-avx' >> "${run_x86_avx_log_file}"
        cd ${x86_path}/mindspore-lite-${version}-inference-linux-x64-avx || return 1
-        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_avx_log_file}"
-        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out >> "${run_x86_avx_log_file}"
+        echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}"
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./benchmark/benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}"
        if [ $? = 0 ]; then
            run_result='x86_avx: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
        else
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc
--- a/mindspore/lite/tools/anf_exporter/anf_exporter.h
+++ b/mindspore/lite/tools/anf_exporter/anf_exporter.h
@ -45,10 +45,27 @@ class AnfExporter {

 protected:
  int ConvertInputCNode(const std::shared_ptr<AnfNode> &input_anode, schema::CNodeT *output_cnode);
-  int ConvertInputParameter(const std::shared_ptr<AnfNode> &input_anode,
+  int ConvertInputParameter(const std::shared_ptr<AnfNode> &input_anode, const std::shared_ptr<PrimitiveC> &primitive,
                            const std::unique_ptr<schema::MetaGraphT> &meta_graphT, schema::CNodeT *output_cnode);
  int ConvertInputValueNode(const std::shared_ptr<AnfNode> &input_anode,
                            const std::unique_ptr<schema::MetaGraphT> &meta_graphT, schema::CNodeT *output_cnode);
+  int ProcessTensor(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                    const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
+                    const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  int ProcessInt32OrInt64Imm(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                             const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
+                             const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  void ProcessBoolImm(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                      const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
+                      const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  void ProcessInt(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                  schema::CNodeT *output_cnode, const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  int ProcessValueSequence(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                           const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
+                           const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
+  int ProcessParamValueLite(const ValueNodePtr &valueNode, std::unique_ptr<schema::TensorT> *paramTensor,
+                            const std::shared_ptr<Value> &value, schema::CNodeT *output_cnode,
+                            const std::unique_ptr<schema::MetaGraphT> &meta_graphT);
  int SetGraphInputIndex(const std::unique_ptr<schema::MetaGraphT> &meta_graphT, const size_t &subgraph_index);
  int SetGraphoutputIndex(const CNodePtr &cnode, const size_t subgraph_index,
                          const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
@ -58,6 +75,9 @@ class AnfExporter {
  static int ConvertQuantParam(const std::unique_ptr<schema::MetaGraphT> &meta_graph,
                               const std::shared_ptr<PrimitiveC> &primitive,
                               const std::unique_ptr<schema::CNodeT> &dst_node);
+  int Anf2Fb(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
+             const size_t &subgraph_index, const bool &keep_graph, const bool &copy_primitive,
+             const std::unique_ptr<schema::SubGraphT> &sub_graphT);
  int ExportSubgraph(const FuncGraphPtr &func_graph, const std::unique_ptr<schema::MetaGraphT> &meta_graphT,
                     const size_t &subgraph_index, bool keep_graph, bool copy_primitive,
                     const std::shared_ptr<AnfNode> &partial_anode = nullptr);
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@ -100,6 +100,7 @@ set(LITE_SRC
        ${SRC_DIR}/lite_model.cc
        ${SRC_DIR}/errorcode.cc
        ${SRC_DIR}/dequant.cc
+        ${SRC_DIR}/huffman_decode.cc
        )
 if(SUPPORT_TRAIN)
    set(LITE_SRC
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@ -51,6 +51,7 @@
 #include "tools/optimizer/graph/functionalize_control_op_pass.h"
 #include "tools/converter/quantizer/post_training_quantizer.h"
 #include "tools/converter/quantizer/quant_cast.h"
+#include "tools/converter/quantizer/huffman_encode.h"
 #include "tools/converter/quantizer/weight_quantizer.h"

 using std::string;
@ -252,6 +253,19 @@ int AnfTransform::DoQuantize(const FuncGraphPtr &old_graph, const converter::Fla
  return RET_OK;
 }

+int AnfTransform::DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph) {
+  if (config->quantType == schema::QuantType_WeightQuant && config->bitNum == "8" && config->enableHuffmanCode) {
+    auto huffman_encode = std::make_unique<lite::huffman_encode>();
+    auto status = huffman_encode->DoHuffmanEncode(new_graph);
+    if (status != RET_OK) {
+      MS_LOG(ERROR) << "Huffman encode failed.";
+      ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
 FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_graph, const converter::Flags *config) {
  MS_ASSERT(nullptr != old_graph);
  if (config == nullptr) {
@ -305,6 +319,13 @@ FuncGraphPtr AnfTransform::TransformSingleFuncGraph(const FuncGraphPtr &old_grap
    MS_LOG(ERROR) << "Do Quantize failed.";
    return nullptr;
  }
+
+  status = DoHuffmanEncode(config, new_graph);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "Do HuffmanCode failed.";
+    return nullptr;
+  }
+
  return new_graph;
 }

--- a/mindspore/lite/tools/converter/anf_transform.h
+++ b/mindspore/lite/tools/converter/anf_transform.h
@ -58,6 +58,8 @@ class AnfTransform {
  int RunTFAdjustPass(const FuncGraphPtr &old_graph, const converter::Flags *config);

  int DoQuantize(const FuncGraphPtr &old_graph, const converter::Flags *config, const FuncGraphPtr &new_graph);
+
+  int DoHuffmanEncode(const converter::Flags *config, const FuncGraphPtr &new_graph);
 };
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/tools/converter/converter_flags.cc
+++ b/mindspore/lite/tools/converter/converter_flags.cc
@ -42,53 +42,17 @@ Flags::Flags() {
  AddFlag(&Flags::quantWeightSize, "quantWeightSize", "Weight quantization size threshold", "0");
  AddFlag(&Flags::quantWeightChannel, "quantWeightChannel", "Channel threshold for weight quantization", "16");
  AddFlag(&Flags::configFile, "configFile", "Configuration for post-training.", "");
+  AddFlag(&Flags::enableHuffmanCodeIn, "enableHuffmanCode",
+          "whether the weight quant model is going to use huffman code."
+          "true | false",
+          "false");
  AddFlag(&Flags::trainModelIn, "trainModel",
          "whether the model is going to be trained on device."
          "true | false",
          "false");
 }

-int Flags::Init(int argc, const char **argv) {
-  if (argc == 1) {
-    std::cout << this->Usage() << std::endl;
-    return RET_SUCCESS_EXIT;
-  }
-  Option<std::string> err = this->ParseFlags(argc, argv);
-
-  if (err.IsSome()) {
-    std::cerr << err.Get();
-    std::cerr << this->Usage() << std::endl;
-    return RET_INPUT_PARAM_INVALID;
-  }
-
-  if (this->help) {
-    std::cout << this->Usage() << std::endl;
-    return RET_SUCCESS_EXIT;
-  }
-  if (this->modelFile.empty()) {
-    std::cerr << "INPUT MISSING: model file path is necessary";
-    return RET_INPUT_PARAM_INVALID;
-  }
-  if (this->outputFile.empty()) {
-    std::cerr << "INPUT MISSING: output file path is necessary";
-    return RET_INPUT_PARAM_INVALID;
-  }
-
-#ifdef _WIN32
-  replace(this->outputFile.begin(), this->outputFile.end(), '/', '\\');
-#endif
-
-  if (this->outputFile.rfind('/') == this->outputFile.length() - 1 ||
-      this->outputFile.rfind('\\') == this->outputFile.length() - 1) {
-    std::cerr << "INPUT ILLEGAL: outputFile must be a valid file path";
-    return RET_INPUT_PARAM_INVALID;
-  }
-
-  if (this->fmkIn.empty()) {
-    std::cerr << "INPUT MISSING: fmk is necessary";
-    return RET_INPUT_PARAM_INVALID;
-  }
-
+int Flags::InitInputOutputDataType() {
  if (this->inputDataTypeIn == "FLOAT") {
    this->inputDataType = TypeId::kNumberTypeFloat32;
  } else if (this->inputDataTypeIn == "INT8") {
@ -117,7 +81,10 @@ int Flags::Init(int argc, const char **argv) {
      this->outputDataTypeIn.c_str();
    return RET_INPUT_PARAM_INVALID;
  }
+  return RET_OK;
+}

+int Flags::InitFmk() {
  if (this->fmkIn == "CAFFE") {
    this->fmk = FmkType_CAFFE;
  } else if (this->fmkIn == "MINDIR") {
@ -137,7 +104,10 @@ int Flags::Init(int argc, const char **argv) {
    std::cerr << "INPUT ILLEGAL: weightFile is not a valid flag";
    return RET_INPUT_PARAM_INVALID;
  }
+  return RET_OK;
+}

+int Flags::InitQuantType() {
  if (this->quantTypeIn == "WeightQuant") {
    this->quantType = QuantType_WeightQuant;
  } else if (this->quantTypeIn == "PostTraining") {
@ -148,7 +118,22 @@ int Flags::Init(int argc, const char **argv) {
    std::cerr << "INPUT ILLEGAL: quantType must be WeightQuant|PostTraining";
    return RET_INPUT_PARAM_INVALID;
  }
+  return RET_OK;
+}
+
+int Flags::InitHuffmanCode() {
+  if (this->enableHuffmanCodeIn == "true") {
+    this->enableHuffmanCode = true;
+  } else if (this->enableHuffmanCodeIn == "false") {
+    this->enableHuffmanCode = false;
+  } else {
+    std::cerr << "INPUT ILLEGAL: trainModel must be true|false ";
+    return RET_INPUT_PARAM_INVALID;
+  }
+  return RET_OK;
+}

+int Flags::InitTrainModel() {
  if (this->trainModelIn == "true") {
    this->trainModel = true;
  } else if (this->trainModelIn == "false") {
@ -160,24 +145,99 @@ int Flags::Init(int argc, const char **argv) {

  if (this->trainModel) {
    if (this->fmk != FmkType_MS) {
-      std::cerr << "INPUT ILLEGAL: train model convertor supporting only MINDIR format";
+      std::cerr << "INPUT ILLEGAL: train model converter supporting only MINDIR format";
      return RET_INPUT_PARAM_INVALID;
    }
    if ((this->inputDataType != TypeId::kNumberTypeFloat32) && (this->inputDataType != TypeId::kTypeUnknown)) {
-      std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 input tensors";
+      std::cerr << "INPUT ILLEGAL: train model converter supporting only FP32 input tensors";
      return RET_INPUT_PARAM_INVALID;
    }
    if ((this->outputDataType != TypeId::kNumberTypeFloat32) && (this->outputDataType != TypeId::kTypeUnknown)) {
-      std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 output tensors";
+      std::cerr << "INPUT ILLEGAL: train model converter supporting only FP32 output tensors";
      return RET_INPUT_PARAM_INVALID;
    }
    if (this->quantType != QuantType_QUANT_NONE) {
-      std::cerr << "INPUT ILLEGAL: train model convertor is not supporting quantization";
+      std::cerr << "INPUT ILLEGAL: train model converter is not supporting quantization";
      return RET_INPUT_PARAM_INVALID;
    }
  }
  return RET_OK;
 }
+
+int Flags::Init(int argc, const char **argv) {
+  int ret;
+  if (argc == 1) {
+    std::cout << this->Usage() << std::endl;
+    return RET_SUCCESS_EXIT;
+  }
+  Option<std::string> err = this->ParseFlags(argc, argv);
+
+  if (err.IsSome()) {
+    std::cerr << err.Get();
+    std::cerr << this->Usage() << std::endl;
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  if (this->help) {
+    std::cout << this->Usage() << std::endl;
+    return RET_SUCCESS_EXIT;
+  }
+  if (this->modelFile.empty()) {
+    std::cerr << "INPUT MISSING: model file path is necessary";
+    return RET_INPUT_PARAM_INVALID;
+  }
+  if (this->outputFile.empty()) {
+    std::cerr << "INPUT MISSING: output file path is necessary";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+#ifdef _WIN32
+  replace(this->outputFile.begin(), this->outputFile.end(), '/', '\\');
+#endif
+
+  if (this->outputFile.rfind('/') == this->outputFile.length() - 1 ||
+      this->outputFile.rfind('\\') == this->outputFile.length() - 1) {
+    std::cerr << "INPUT ILLEGAL: outputFile must be a valid file path";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  if (this->fmkIn.empty()) {
+    std::cerr << "INPUT MISSING: fmk is necessary";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  ret = InitInputOutputDataType();
+  if (ret != RET_OK) {
+    std::cerr << "Init input output datatype failed.";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  ret = InitFmk();
+  if (ret != RET_OK) {
+    std::cerr << "Init fmk failed.";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  ret = InitQuantType();
+  if (ret != RET_OK) {
+    std::cerr << "Init quant type failed.";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  ret = InitHuffmanCode();
+  if (ret != RET_OK) {
+    std::cerr << "Init huffman code failed.";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  ret = InitTrainModel();
+  if (ret != RET_OK) {
+    std::cerr << "Init train model failed.";
+    return RET_INPUT_PARAM_INVALID;
+  }
+
+  return RET_OK;
+}
 }  // namespace converter
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/tools/converter/converter_flags.h
+++ b/mindspore/lite/tools/converter/converter_flags.h
@ -45,6 +45,16 @@ class Flags : public virtual mindspore::lite::FlagParser {

  ~Flags() override = default;

+  int InitInputOutputDataType();
+
+  int InitFmk();
+
+  int InitQuantType();
+
+  int InitHuffmanCode();
+
+  int InitTrainModel();
+
  int Init(int argc, const char **argv);

 public:
@ -70,6 +80,8 @@ class Flags : public virtual mindspore::lite::FlagParser {
  std::string bitNum;
  std::string configFile;
  std::string quantWeightChannel;
+  std::string enableHuffmanCodeIn;
+  bool enableHuffmanCode = false;
  std::string trainModelIn;
  bool trainModel = false;
 };
--- a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt
@ -10,6 +10,7 @@ file(GLOB QUANTIZER
        ${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/quant_cast.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/weight_quantizer.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_encode.cc
        )
 set_property(SOURCE ${QUANTIZER} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_LITE)
 add_library(quantizer_mid OBJECT ${QUANTIZER})
--- a/mindspore/lite/tools/converter/quantizer/huffman_encode.cc
+++ b/mindspore/lite/tools/converter/quantizer/huffman_encode.cc
--- a/mindspore/lite/tools/converter/quantizer/huffman_encode.h
+++ b/mindspore/lite/tools/converter/quantizer/huffman_encode.h
@ -0,0 +1,77 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H
+#define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H
+
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <queue>
+#include <map>
+#include <fstream>
+#include "src/common/log_adapter.h"
+#include "src/ops/primitive_c.h"
+#include "ir/func_graph.h"
+
+namespace mindspore {
+namespace lite {
+
+using STATUS = int;
+
+const int PSEUDO_EOF = 128;
+
+struct HuffmanNode {
+  int key;
+  unsigned int freq;
+  std::string code;
+  HuffmanNode *left, *right, *parent;
+};
+using HuffmanNodePtr = HuffmanNode *;
+
+struct cmp {
+ public:
+  bool operator()(const HuffmanNodePtr &c1, const HuffmanNodePtr &c2) const { return c1->freq > c2->freq; }
+};
+using HuffmanPriorityQueue = std::priority_queue<HuffmanNodePtr, std::vector<HuffmanNodePtr>, cmp>;
+
+class huffman_encode {
+ public:
+  huffman_encode() = default;
+
+  ~huffman_encode();
+
+  STATUS DoHuffmanEncode(const FuncGraphPtr &func_graph);
+
+ private:
+  std::map<int, std::string> huffman_table_;
+  std::string huffman_encoded_str_ = "";
+  std::vector<HuffmanNodePtr> huffman_nodes_;
+
+  STATUS GetHuffmanPriorityQueue(const int8_t *input_datas, size_t input_data_size, HuffmanPriorityQueue *pq);
+
+  void GenerateHuffmanTable(HuffmanNodePtr node, bool is_left_node);
+
+  STATUS BuildHuffmanTree(HuffmanPriorityQueue *pq);
+
+  STATUS DoHuffmanCompress(const int8_t *input_datas, size_t data_size);
+};
+
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_HUFFMANCODE_HUFFMAN_H