diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs index e39fc36d06..df9889c96b 100644 --- a/mindspore/lite/schema/model.fbs +++ b/mindspore/lite/schema/model.fbs @@ -34,6 +34,7 @@ table QuantParam { inited: bool = false; var_corr: double = 1; mean_corr: double = 0; + clusters: [float]; } table Tensor { diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 53f8cce585..e2ce2ff400 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -110,6 +110,12 @@ int LiteSession::ConvertTensors(const lite::Model *model) { quant_arg.zeroPoint = quant_params->Get(j)->zeroPoint(); quant_arg.var_corr = quant_params->Get(j)->var_corr(); quant_arg.mean_corr = quant_params->Get(j)->mean_corr(); + auto quant_clusters = quant_params->Get(j)->clusters(); + if (quant_clusters != nullptr) { + for (size_t k = 0; k < quant_clusters->size(); k++) { + quant_arg.clusters.emplace_back(quant_clusters->Get(k)); + } + } dstTensor->AddQuantParam(quant_arg); } } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/dequant.h b/mindspore/lite/src/runtime/kernel/arm/base/dequant.h index 84265ded59..934ee09cb5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/dequant.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/dequant.h @@ -83,7 +83,17 @@ class DequantUtil { auto scale = param.scale; auto zero_point = param.zeroPoint; for (int64_t j = 0; j < input_tensor->ElementsNum(); j++) { - dequant_datas[j] = static_cast((quant_datas[j] - zero_point) * scale); + if (param.clusters.size() != 0) { + int8_t index = quant_datas[j]; + if (index > INT8_MAX || index < INT8_MIN) { + MS_LOG(ERROR) << "KMeans param quant is error."; + free(dequant_datas); + return nullptr; + } + dequant_datas[j] = static_cast(param.clusters[index - INT8_MIN]); + } else { + dequant_datas[j] = static_cast((quant_datas[j] - zero_point) * scale); + } } } return dequant_datas; diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 7f219de192..fa4ff6f817 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -35,6 +35,7 @@ struct QuantArg { int32_t zeroPoint; double var_corr{1}; double mean_corr{0}; + std::vector clusters{}; }; class Tensor : public mindspore::tensor::MSTensor { diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index 3f5b68fbf1..557d4c1012 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -101,7 +101,7 @@ STATUS DivergInfo::ComputeThreshold() { } if (method_x == kMethodOutlier) { - this->percent_result = PercentMethod(min_datas, max_datas); + this->percent_result = OutlierMethod(min_datas, max_datas); this->best_T = std::max(std::fabs(percent_result.first), std::fabs(percent_result.second)); return RET_OK; } diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index b54ddf0c68..24993e95c5 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include "src/ops/primitive_c.h" #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" #include "src/common/utils.h" @@ -305,8 +306,8 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) { return RET_OK; } -bool SearchLowerBound(const std::vector &data, const size_t &index, const float &max_tmp, float *min_tmp, - size_t *min_idx) { +static bool SearchLowerBound(const std::vector &data, const size_t &index, const float &max_tmp, float *min_tmp, + size_t *min_idx) { size_t length = data.size(); if (max_tmp - data.at(index) < delta) { return false; @@ -320,8 +321,8 @@ bool SearchLowerBound(const std::vector &data, const size_t &index, const return true; } -bool SearchUpperBound(const std::vector &data, const size_t &index, float *max_tmp, const float &min_tmp, - size_t *max_idx) { +static bool SearchUpperBound(const std::vector &data, const size_t &index, float *max_tmp, const float &min_tmp, + size_t *max_idx) { size_t length = data.size(); if (data.at(index) - min_tmp < delta) { return false; @@ -335,7 +336,7 @@ bool SearchUpperBound(const std::vector &data, const size_t &index, float return true; } -float CalPercentile(const std::vector &datas, const int &outlier_percent) { +static float CalPercentile(const std::vector &datas, const int &outlier_percent) { const int size = datas.size(); float val = outlier_percent / 100.0 * size; int index = std::ceil(val); @@ -348,7 +349,7 @@ float CalPercentile(const std::vector &datas, const int &outlier_percent) return result; } -std::pair PercentMethod(std::vector min_datas, std::vector max_datas) { +std::pair OutlierMethod(std::vector min_datas, std::vector max_datas) { std::sort(max_datas.begin(), max_datas.end()); std::sort(min_datas.begin(), min_datas.end()); float min_val = CalPercentile(min_datas, percent); @@ -372,6 +373,64 @@ std::pair PercentMethod(std::vector min_datas, std::vector< std::pair result{min_tmp, max_tmp}; return result; } + +static std::vector InitClusters(float *data, size_t elem_count, size_t k) { + std::set set_unique{}; + for (size_t i = 0; i < elem_count; i++) { + set_unique.emplace(data[i]); + } + std::vector data_unique; + data_unique.assign(set_unique.begin(), set_unique.end()); + std::vector clusters{}; + if (set_unique.size() < k) { + return clusters; + } + // init cluster + float ratio = static_cast(data_unique.size()) / (k - 1); + std::sort(data_unique.begin(), data_unique.end()); + for (size_t i = 0; i < k; i++) { + size_t index = std::floor(i * ratio); + if (i * ratio - index > 0) { + clusters.emplace_back((data_unique[index] + data_unique[index + 1]) / 2); + } else { + clusters.emplace_back(data_unique[index]); + } + } + return clusters; +} + +std::vector KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam) { + std::vector clusters = InitClusters(data, elem_count, k); + std::vector clusters_index{}; + if (clusters.size() < k) { + MS_LOG(WARNING) << "K is less than the size of data so KMeans function is not executed."; + return clusters_index; + } + for (size_t epoch = 0; epoch < epochs; epoch++) { + clusters_index.clear(); + std::vector> clusters_data(clusters.size()); + for (size_t i = 0; i < elem_count; i++) { + size_t index = 0; + float min_distance = pow(data[i] - clusters[0], 2); + for (size_t j = 1; j < clusters.size(); j++) { + if (pow(data[i] - clusters[j], 2) < min_distance) { + min_distance = pow(data[i] - clusters[j], 2); + index = j; + } + } + clusters_index.emplace_back(index + INT8_MIN); + clusters_data[index].emplace_back(data[i]); + } + for (size_t j = 0; j < clusters.size(); j++) { + if (clusters_data[j].size() > 0) { + clusters[j] = std::accumulate(clusters_data[j].begin(), clusters_data[j].end(), 0.0) / clusters_data[j].size(); + } + } + } + // update data + quantParam->clusters = clusters; + return clusters_index; +} } // namespace quant } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h index ddfc8032a4..c751199cec 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.h +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h @@ -72,15 +72,9 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false, int numBits = UINT8_QUANTIZATION); -bool SearchLowerBound(const std::vector &data, const size_t &index, const float &max_tmp, float *min_tmp, - size_t *min_idx); +std::pair OutlierMethod(std::vector min_datas, std::vector max_datas); -bool SearchUpperBound(const std::vector &data, const size_t &index, float *max_tmp, const float &min_tmp, - size_t *max_idx); - -float CalPercentile(const std::vector &datas, const int &percent); - -std::pair PercentMethod(std::vector min_datas, std::vector max_datas); +std::vector KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam); template T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { @@ -213,7 +207,7 @@ STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr primiti average_raw += raw_data; } } - if (quantType == QuantType_WeightQuant) { + if (quantType == QuantType_WeightQuant && quant_param.clusters.size() == 0) { // mean average_dequant = average_dequant / one_filter_size; average_raw = average_raw / one_filter_size; @@ -261,17 +255,21 @@ STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr primiti } schema::QuantParamT quant_param; - STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); - if (status != RET_OK) { - MS_LOG(ERROR) << "CalQuantizationParams failed" << status; - return status; + if (quant_param.clusters.size() == 0) { + STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); + if (status != RET_OK) { + MS_LOG(ERROR) << "CalQuantizationParams failed" << status; + return status; + } } quant_params.emplace_back(quant_param); // update data and datatype for (uint32_t i = 0; i < elem_count; i++) { float raw_data = raw_datas[i]; - auto quant_data = QuantizeData(raw_data, quant_param, quant_max, quant_min); - quant_datas[i] = quant_data; + if (quant_param.clusters.size() == 0) { + auto quant_data = QuantizeData(raw_data, quant_param, quant_max, quant_min); + quant_datas[i] = quant_data; + } } auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(T)); if (ret != EOK) {