From 09084d4b306898e69ef4b8ebf0d299e212704712 Mon Sep 17 00:00:00 2001 From: xutianchun Date: Mon, 17 Aug 2020 16:25:39 +0800 Subject: [PATCH] PerChannel Post Quantization --- mindspore/lite/src/ir/primitive_t_value.h | 5 +- mindspore/lite/src/param_value_lite.h | 15 -- .../lite/tools/anf_exporter/anf_exporter.cc | 22 -- .../anf_importer/import_from_meta_graphT.cc | 22 +- mindspore/lite/tools/converter/converter.cc | 1 - .../tools/converter/quantizer/CMakeLists.txt | 1 - .../quantizer/post_training_quantizer.cc | 118 ++++----- .../quantizer/post_training_quantizer.h | 8 +- .../converter/quantizer/quantize_util.cc | 241 +++++++++++------- .../tools/converter/quantizer/quantize_util.h | 16 +- .../converter/quantizer/weight_quantizer.cc | 150 ----------- .../converter/quantizer/weight_quantizer.h | 53 ---- 12 files changed, 231 insertions(+), 421 deletions(-) delete mode 100644 mindspore/lite/tools/converter/quantizer/weight_quantizer.cc delete mode 100644 mindspore/lite/tools/converter/quantizer/weight_quantizer.h diff --git a/mindspore/lite/src/ir/primitive_t_value.h b/mindspore/lite/src/ir/primitive_t_value.h index 170715ed2c..c3363014ab 100644 --- a/mindspore/lite/src/ir/primitive_t_value.h +++ b/mindspore/lite/src/ir/primitive_t_value.h @@ -47,7 +47,6 @@ class PrimitiveTValue : public Value { } } - void SetInputQuantParam(const std::vector> &input_quant_param) { this->input_quant_param_ = input_quant_param; } @@ -56,6 +55,10 @@ class PrimitiveTValue : public Value { this->output_quant_param_ = output_quant_param; } + void ClearInputOutputQuantParam() { + input_quant_param_.clear(); + output_quant_param_.clear(); + } void AddInputQuantParam(std::vector quant_param) { this->input_quant_param_.emplace_back(quant_param); diff --git a/mindspore/lite/src/param_value_lite.h b/mindspore/lite/src/param_value_lite.h index ba4094584c..907e70a2d0 100644 --- a/mindspore/lite/src/param_value_lite.h +++ b/mindspore/lite/src/param_value_lite.h @@ -25,16 +25,6 @@ #include "ir/dtype/type_id.h" namespace mindspore { -struct AnfQuantParam { - double scale; - int32_t zeroPoint; - double min; - double max; - bool narrowRange; - bool inited; - int32_t numBits; - AnfQuantParam() : scale(1.0), zeroPoint(0), min(0.0), max(0.0), narrowRange(false), numBits(8), inited(false) {} -}; class ParamValueLite : public Value { public: ParamValueLite() : tensor_addr_(nullptr), tensor_size_(0) {} @@ -59,10 +49,6 @@ class ParamValueLite : public Value { } return size; } - std::vector> &quant_param() { return quant_params_; } - void set_quant_param(std::unique_ptr &quant_param) { - quant_params_.emplace_back(std::move(quant_param)); - } bool operator==(const Value &other) const override { return this == &other; @@ -73,7 +59,6 @@ class ParamValueLite : public Value { size_t tensor_size_; std::vector tensor_shape_; TypeId type_id_; - std::vector> quant_params_; }; using ParamValueLitePtr = std::shared_ptr; diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index fe8012e6db..68bf5ff4b7 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -159,16 +159,6 @@ int AnfExporter::ConvertQuantParam(const std::unique_ptr &me primitive->GetPrimitiveT()->value.AsQuantDTypeCast()->dstT == kNumberTypeFloat32)) { tensor_output->dataType = kNumberTypeInt8; } - // // TensorType - // valuePtr = primitive->GetAttr(kInputTensorDataType); - // if (valuePtr != nullptr) { - // MS_LOG(INFO) << "node: " << node->name << " input tensor data - // type: " << GetValue(valuePtr); for (auto input : - // node->inputIndex) { - // auto tensor = subGraph->allTensors[input].get(); - // tensor->dataType = kNumberTypeUInt8; - // } - // } } return RET_OK; } @@ -295,18 +285,6 @@ int AnfExporter::ConvertInputParameter(const std::shared_ptr input_anod paramTensor->nodeType = schema::NodeType_ValueNode; paramTensor->data.resize(paramValue->tensor_size()); memcpy(paramTensor->data.data(), paramValue->tensor_addr(), paramValue->tensor_size()); - for (auto &ite : paramValue->quant_param()) { - auto quantPar = std::make_unique(); - quantPar->scale = ite->scale; - quantPar->zeroPoint = ite->zeroPoint; - quantPar->min = ite->zeroPoint; - quantPar->max = ite->max; - quantPar->narrowRange = ite->narrowRange; - quantPar->inited = ite->inited; - quantPar->numBits = ite->numBits; - paramTensor->quantParams.emplace_back(std::move(quantPar)); - paramTensor->dataType = paramValue->tensor_type(); - } } node_id_map_[paramNode->fullname_with_scope()] = meta_graphT->allTensors.size(); output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); diff --git a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc index 44b67dca56..cc3b1b6662 100644 --- a/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc +++ b/mindspore/lite/tools/anf_importer/import_from_meta_graphT.cc @@ -61,17 +61,17 @@ int AnfImporterFromMetaGraphT::ConverterConstTensor() { param_value->set_tensor_addr(tensor_data); param_value->set_tensor_size(size); } - if (!tensor->quantParams.empty()) { - std::unique_ptr quantParam = std::make_unique(); - quantParam->scale = tensor->quantParams[0]->scale; - quantParam->zeroPoint = tensor->quantParams[0]->zeroPoint; - quantParam->min = tensor->quantParams[0]->min; - quantParam->max = tensor->quantParams[0]->max; - quantParam->narrowRange = tensor->quantParams[0]->narrowRange; - quantParam->numBits = tensor->quantParams[0]->numBits; - quantParam->inited = tensor->quantParams[0]->inited; - param_value->set_quant_param(quantParam); - } +// if (!tensor->quantParams.empty()) { +// std::unique_ptr quantParam = std::make_unique(); +// quantParam->scale = tensor->quantParams[0]->scale; +// quantParam->zeroPoint = tensor->quantParams[0]->zeroPoint; +// quantParam->min = tensor->quantParams[0]->min; +// quantParam->max = tensor->quantParams[0]->max; +// quantParam->narrowRange = tensor->quantParams[0]->narrowRange; +// quantParam->numBits = tensor->quantParams[0]->numBits; +// quantParam->inited = tensor->quantParams[0]->inited; +// param_value->set_quant_param(quantParam); +// } parameter->set_default_param(param_value); AddNode(i, parameter); } diff --git a/mindspore/lite/tools/converter/converter.cc b/mindspore/lite/tools/converter/converter.cc index 8ff14c519b..b386ede35f 100644 --- a/mindspore/lite/tools/converter/converter.cc +++ b/mindspore/lite/tools/converter/converter.cc @@ -31,7 +31,6 @@ #include "tools/anf_exporter/anf_exporter.h" #include "tools/anf_importer/import_from_protobuf.h" #include "tools/converter/parser/onnx/onnx.pb.h" -#include "tools/converter/quantizer/weight_quantizer.h" #include "tools/converter/quantizer/post_training_quantizer.h" #include "tools/converter/quantizer/quant_cast.h" diff --git a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt index 009fd01ae5..59082b4413 100644 --- a/mindspore/lite/tools/converter/quantizer/CMakeLists.txt +++ b/mindspore/lite/tools/converter/quantizer/CMakeLists.txt @@ -7,7 +7,6 @@ add_library(quantizer_mid OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/calc_quant_param.cc ${CMAKE_CURRENT_SOURCE_DIR}/quantizer.cc ${CMAKE_CURRENT_SOURCE_DIR}/aware_quantizer.cc - ${CMAKE_CURRENT_SOURCE_DIR}/weight_quantizer.cc ${CMAKE_CURRENT_SOURCE_DIR}/quantize_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/general_bitpacking.cc ${CMAKE_CURRENT_SOURCE_DIR}/post_training_quantizer.cc diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index 83431dc609..09aa9ef679 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -510,7 +510,6 @@ STATUS PostTrainingQuantizer::DoQuantInput(double scale, int zeropoint, struct M quant_param.narrowRange = false; std::vector quant_params = {quant_param}; lite_primitive->AddInputQuantParam(quant_params); - // p->AddAttr("quant_input_dataType", MakeValue((int)DataType_DT_FLOAT)); return RET_OK; } @@ -528,51 +527,67 @@ STATUS PostTrainingQuantizer::DoQuantOutput(double scale, int zeropoint, struct quant_param.narrowRange = false; std::vector quant_params = {quant_param}; lite_primitive->AddOutputQuantParam(quant_params); - // p->AddAttr("quant_output_dataType", MakeValue((int)DataType_DT_FLOAT)); return RET_OK; } -STATUS PostTrainingQuantizer::DoWeightQuant(AnfNodePtr node) { +STATUS PostTrainingQuantizer::DoWeightQuant(AnfNodePtr weight, std::shared_ptr primitiveT_value, + bool depthwise) { // const vector dims = filter->dims; // perlayer - if (!node->isa()) { + if (!weight->isa()) { MS_LOG(ERROR) << "not a parameter"; return RET_PARAM_INVALID; } - auto parameter = std::dynamic_pointer_cast(node); + auto parameter = std::dynamic_pointer_cast(weight); ParamValueLitePtr paramValue = std::dynamic_pointer_cast(parameter->default_param()); - auto status = QuantFilter(paramValue, QuantType_PostTraining, quant_max, quant_min, bit_num, per_channel_); + auto status = QuantFilter(paramValue, primitiveT_value, QuantType_PostTraining, quant_max, quant_min, bit_num, + per_channel_, depthwise); if (status != RET_OK) { MS_LOG(ERROR) << "QuantFilter failed: " << status; return status; } + // set dtype + auto abstractBase = parameter->abstract(); + if (abstractBase == nullptr) { + MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << parameter->name(); + return RET_ERROR; + } + if (!utils::isa(abstractBase)) { + MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << parameter->name(); + return RET_ERROR; + } + auto abstractTensor = utils::cast(abstractBase); + abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt8)); return RET_OK; } -STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr input, AnfNodePtr weight, AnfNodePtr bias) { - if (input == nullptr || weight == nullptr || bias == nullptr) { +STATUS PostTrainingQuantizer::DoBiasQuant(AnfNodePtr bias, std::shared_ptr primitiveT_value) { + if (primitiveT_value == nullptr || bias == nullptr) { MS_LOG(ERROR) << "null pointer!"; return RET_NULL_PTR; } - ParameterPtr weightParameterPtr = std::dynamic_pointer_cast(weight); - auto default_param = weightParameterPtr->default_param(); - auto weight_param = std::dynamic_pointer_cast(default_param); - // std::vector> weight_quant_params = weight_param->get_quant_params(); - - ParameterPtr biasParameterPtr = std::dynamic_pointer_cast(bias); - auto bias_default_param = biasParameterPtr->default_param(); + auto bias_parameter_ptr = std::dynamic_pointer_cast(bias); + auto bias_default_param = bias_parameter_ptr->default_param(); auto bias_param = std::dynamic_pointer_cast(bias_default_param); + auto active_weight_quant_params = primitiveT_value->GetInputQuantParams(); + if (active_weight_quant_params.size() != 2) { + MS_LOG(ERROR) << "unexpected active_weight_quant_params size: " << active_weight_quant_params.size(); + return RET_ERROR; + } + + auto active_params = active_weight_quant_params[0]; + auto weight_params = active_weight_quant_params[1]; + vector input_scales; vector filter_scales; vector bias_scales; - auto quant_params = input->GetInputQuantParams(); - size_t sizeX = quant_params.size(); + size_t sizeX = active_params.size(); for (size_t i = 0; i < sizeX; i++) { - input_scales.emplace_back(quant_params[i].front().scale); + input_scales.emplace_back(active_params[i].scale); } - size_t sizeY = weight_param->quant_param().size(); + size_t sizeY = weight_params.size(); if (sizeX != sizeY) { if (sizeX > 1 && sizeY > 1) { MS_LOG(ERROR) << "input and filter's scale count cannot match!"; @@ -580,8 +595,7 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr input } } for (size_t i = 0; i < sizeY; i++) { - auto scale = weight_param->quant_param()[i]->scale; - filter_scales.push_back(scale); + filter_scales.emplace_back(weight_params[i].scale); } size_t size = std::max(sizeX, sizeY); for (size_t i = 0; i < size; i++) { @@ -593,20 +607,22 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr input size_t shape_size = bias_param->tensor_shape_size(); // set bias quant param - bias_param->quant_param().clear(); + vector quant_params; for (size_t i = 0; i < bias_scales.size(); i++) { - std::unique_ptr param(new (std::nothrow) AnfQuantParam()); - param->scale = bias_scales[i]; - param->zeroPoint = 0; - bias_param->quant_param().emplace_back(std::move(param)); + schema::QuantParamT quant_param; + quant_param.scale = bias_scales[i]; + quant_param.zeroPoint = 0; + quant_param.inited = true; + quant_params.emplace_back(quant_param); } + primitiveT_value->AddInputQuantParam(quant_params); // quant bias data int32_t *quant_datas = new (std::nothrow) int32_t[shape_size]; if (quant_datas == nullptr) { MS_LOG(ERROR) << "null pointer dereferencing."; return RET_NULL_PTR; } - float *raw_datas = reinterpret_cast(bias_param->tensor_addr()); + float *raw_datas = static_cast(bias_param->tensor_addr()); double bias_scale_tmp; for (size_t i = 0; i < shape_size; i++) { if (bias_scales.size() == 1) { @@ -625,38 +641,21 @@ STATUS PostTrainingQuantizer::DoBiasQuant(std::shared_ptr input return RET_ERROR; } delete[] quant_datas; - bias_param->set_tensor_type(kNumberTypeInt32); + // set dtype + auto abstractBase = bias_parameter_ptr->abstract(); + if (abstractBase == nullptr) { + MS_LOG(ERROR) << "Abstract of parameter is nullptr, " << bias_parameter_ptr->name(); + return RET_ERROR; + } + if (!utils::isa(abstractBase)) { + MS_LOG(ERROR) << "Abstract of parameter should be anstract tensor, " << bias_parameter_ptr->name(); + return RET_ERROR; + } + auto abstractTensor = utils::cast(abstractBase); + abstractTensor->element()->set_type(TypeIdToType(kNumberTypeInt32)); return RET_OK; } -// STATUS PostTrainingQuantizer::reformatConvWeight(GraphDefT *graph) { -// for (auto &subGraph : graphDefT->subgraphs) { -// for (auto iter = subGraph->nodes.begin(); iter != subGraph->nodes.end(); iter++) { -// OpDefT *node = (*iter).get(); -// bool isConv = false; -// kTransFilterType tansType; -// if ((*node).attr.type == OpT_Conv2D) { -// tansType = kKCHW2HWCK; -// isConv = true; -// } -// else if ((*node).attr.type == OpT_DepthwiseConv2D) { -// tansType = kCKHW2HWCK; -// isConv = true; -// } -// if (isConv) { -// auto status = TransFilterFormat(&(*subGraph.get()->allTensors.at(node->inputIndex[1])), -// tansType); -// if (status != RET_OK) { -// return status; -// } -// TensorDefT *weight = subGraph->allTensors.at(node->inputIndex[1]).get(); -// weight->format = Format_HWCK; -// PostBitPack(weight, bitNum); -// } -// } -// } -//} - STATUS PostTrainingQuantizer::QuantNode() { auto input_min_max = this->calibrator_->GetMinMax(this->calibrator_->GetInputDivergInfo()); auto input_scale = this->calibrator_->GetResult(this->calibrator_->GetInputDivergInfo()); @@ -682,7 +681,7 @@ STATUS PostTrainingQuantizer::QuantNode() { primitiveT_value->SetQuantType(schema::QuantType_QUANT_NONE); continue; } - auto input_vec = cnode->inputs(); + primitiveT_value->ClearInputOutputQuantParam(); auto op_name = cnode->fullname_with_scope(); auto op_type = primitiveT_value->GetPrimitiveT()->value.type; MS_LOG(INFO) << "OpName: " << op_name; @@ -711,11 +710,12 @@ STATUS PostTrainingQuantizer::QuantNode() { DoQuantInput(scale, convInputzeropoint, &input_min_max[cnode], primitiveT_value); // do weight quant auto weight = cnode->input(2); - DoWeightQuant(weight); + bool depthwise = op_type == PrimitiveType_DeDepthwiseConv2D; + DoWeightQuant(weight, primitiveT_value, depthwise); // do bias quant if (cnode->inputs().size() == 4) { auto bias = cnode->input(3); - DoBiasQuant(primitiveT_value, weight, bias); + DoBiasQuant(bias, primitiveT_value); } } // do output quant diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h index d9e16e1f84..09780440e2 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.h @@ -65,8 +65,8 @@ class PostTrainingQuantizer : public Quantizer { STATUS DoQuantize(FuncGraphPtr funcGraph) override; size_t bit_num; - int quant_max{127}; - int quant_min{-128}; + int quant_max{INT8_MAX}; + int quant_min{INT8_MIN}; private: bool per_channel_; @@ -96,9 +96,9 @@ class PostTrainingQuantizer : public Quantizer { STATUS DoQuantInput(double scale, int32_t zeropoint, struct MaxMin *max_min, std::shared_ptr); STATUS DoQuantOutput(double scale, int32_t zeropoint, struct MaxMin *max_min, std::shared_ptr); - STATUS DoWeightQuant(AnfNodePtr node); + STATUS DoWeightQuant(AnfNodePtr weight, std::shared_ptr primitiveT_value, bool depthwise); - STATUS DoBiasQuant(std::shared_ptr input, AnfNodePtr weight, AnfNodePtr bias); + STATUS DoBiasQuant(AnfNodePtr bias, std::shared_ptr primitiveT_value); }; struct DivergInfo; diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index e7e37b41e7..4c2746412e 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -99,7 +99,9 @@ bool QuantStrategy::CanOpPostQuantized(AnfNodePtr &node) const { schema::PrimitiveType_Nchw2Nhwc, schema::PrimitiveType_Nhwc2Nchw, schema::PrimitiveType_Conv2D, schema::PrimitiveType_DepthwiseConv2D, schema::PrimitiveType_Add, schema::PrimitiveType_Pooling, - schema::PrimitiveType_Concat, /*schema::PrimitiveType_SoftMax,*/ schema::PrimitiveType_Reshape, + schema::PrimitiveType_Concat, /*schema::PrimitiveType_SoftMax,*/ + schema::PrimitiveType_Reshape, /*schema::PrimitiveType_FullConnection,*/ + schema::PrimitiveType_MatMul, schema::PrimitiveType_Activation}; return IsContain(uint8OpList, type); } @@ -191,7 +193,7 @@ void CalFakeNode(const AnfNodePtr &inTensor) { // } } -STATUS CalQuantizationParams(std::unique_ptr &quantParam, double mMin, double mMax, bool narrowRange, +STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, int quant_min, int num_bits) { MS_ASSERT(quantParam != nullptr); if (mMin > 0.0f) { @@ -306,133 +308,178 @@ STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, doubl return RET_OK; } -STATUS QuantFilter(ParamValueLitePtr &weightPtr, QuantType quantType, int quant_max, int quant_min, size_t bitNum, - bool per_channel) { - auto dims = weightPtr->tensor_shape(); - if (dims.size() != 4) { - MS_LOG(ERROR) << "weight dims size error: " << dims.size() << " Back to per layer."; - per_channel = false; - } else { - uint32_t channels = dims[0]; - if (channels == 0) { - MS_LOG(ERROR) << "channels is 0"; - return RET_ERROR; +STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr primitiveT_value, QuantType quantType, + int quant_max, int quant_min, size_t bitNum, bool per_channel, bool depth_wise) { + auto dims = weight->tensor_shape(); + if (per_channel) { + if (dims.size() != 4) { + MS_LOG(ERROR) << "weight dims size error: " << dims.size() << " Back to per layer."; + per_channel = false; + } else { + uint32_t channels = dims[0]; + if (channels == 0) { + MS_LOG(ERROR) << "channels is 0"; + return RET_ERROR; + } } } + vector quant_params; + size_t elem_count = weight->tensor_shape_size(); + auto *raw_datas = static_cast(weight->tensor_addr()); + if (raw_datas == nullptr) { + MS_LOG(ERROR) << "rawDatas is nullptr"; + return RET_ERROR; + } + vector quant_datas(elem_count); + if (per_channel) { // notice: // at now for tflite model, Conv2D's weight format is KHWC, so is DepthwiseConv2D // if TransWeightFormat is done before PostTraingingQuantization, the DepthwiseCon2D's weight is CHWK - size_t shapeSize = weightPtr->tensor_shape_size(); - auto channels = dims[0]; - size_t oneFilterSize = shapeSize / channels; - auto *rawDatas = reinterpret_cast(weightPtr->tensor_addr()); - if (rawDatas == nullptr) { - MS_LOG(ERROR) << "rawDatas is nullptr"; - return RET_ERROR; - } - - float min = FLT_MAX; - float max = -FLT_MAX; - weightPtr->quant_param().clear(); - vector qDatas(shapeSize); - - for (uint32_t i = 0; i < channels; i++) { - // find min and max - for (uint32_t j = 0; j < oneFilterSize; j++) { - auto index = j + i * channels; - if (index >= shapeSize) { - MS_LOG(ERROR) << "over flow!"; - return RET_ERROR; + if (depth_wise) { + // channel at last + auto channels = dims[3]; + if (channels == 0) { + MS_LOG(ERROR) << "channels is zero"; + return RET_ERROR; + } + size_t one_filter_size = elem_count / channels; + + for (uint32_t i = 0; i < channels; i++) { + float min = FLT_MAX; + float max = -FLT_MAX; + // find min and max + for (uint32_t j = 0; j < one_filter_size; j++) { + auto index = i + j * channels; + if (index >= elem_count) { + MS_LOG(ERROR) << "over flow!"; + return RET_ERROR; + } + min = std::min(min, raw_datas[index]); + max = std::max(max, raw_datas[index]); + } + schema::QuantParamT quant_param; + STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); + if (status != RET_OK) { + MS_LOG(ERROR) << "CalQuantizationParams failed" << status; + return status; } - min = std::min(min, rawDatas[index]); - max = std::max(max, rawDatas[index]); + quant_params.emplace_back(quant_param); + // do quantization + for (uint32_t j = 0; j < one_filter_size; j++) { + auto index = i + j * channels; + if (index >= elem_count) { + MS_LOG(ERROR) << "over flow!"; + return RET_ERROR; + } + float raw_data = raw_datas[index]; + auto quant_data = QuantizeData(raw_data, quant_param, quant_max, quant_min); + quant_datas[index] = quant_data; + } + } + auto ret = memcpy_s(const_cast(raw_datas), weight->tensor_size(), quant_datas.data(), + elem_count * sizeof(int8_t)); + if (ret != EOK) { + MS_LOG(ERROR) << "memcpy error: " << ret; + return RET_ERROR; } - std::unique_ptr quantParam = std::unique_ptr(new AnfQuantParam); - STATUS status = CalQuantizationParams(quantParam, min, max, false, quant_max, quant_min, bitNum); - if (status != RET_OK) { - MS_LOG(ERROR) << "CalQuantizationParams failed" << status; - return status; + if (quantType == QuantType_WeightQuant) { + PostBitPack(const_cast(raw_datas), elem_count, bitNum); } - // do quantization - for (uint32_t j = 0; j < oneFilterSize; j++) { - auto index = j + i * channels; - if (index >= shapeSize) { - MS_LOG(ERROR) << "over flow!"; - return RET_ERROR; + + weight->set_tensor_size(elem_count * sizeof(int8_t)); + } else { + // channel at first + auto channels = dims[0]; + if (channels == 0) { + MS_LOG(ERROR) << "channels is zero"; + return RET_ERROR; + } + size_t one_filter_size = elem_count / channels; + + for (uint32_t i = 0; i < channels; i++) { + float min = FLT_MAX; + float max = -FLT_MAX; + // find min and max + for (uint32_t j = 0; j < one_filter_size; j++) { + auto index = j + i * one_filter_size; + if (index >= elem_count) { + MS_LOG(ERROR) << "over flow!"; + return RET_ERROR; + } + min = std::min(min, raw_datas[index]); + max = std::max(max, raw_datas[index]); + } + schema::QuantParamT quant_param; + STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); + if (status != RET_OK) { + MS_LOG(ERROR) << "CalQuantizationParams failed" << status; + return status; + } + quant_params.emplace_back(quant_param); + // do quantization + for (uint32_t j = 0; j < one_filter_size; j++) { + auto index = j + i * one_filter_size; + if (index >= elem_count) { + MS_LOG(ERROR) << "over flow!"; + return RET_ERROR; + } + float raw_data = raw_datas[index]; + auto quant_data = QuantizeData(raw_data, quant_param, quant_max, quant_min); + quant_datas[index] = quant_data; } - float rawData = rawDatas[index]; - auto qData = QuantizeData(rawData, quantParam.get(), quant_max, quant_min); - qDatas[index] = qData; } - weightPtr->set_quant_param(quantParam); - } - auto ret = - memcpy_s(const_cast(rawDatas), weightPtr->tensor_size(), qDatas.data(), shapeSize * sizeof(int8_t)); - if (ret != EOK) { - MS_LOG(ERROR) << "memcpy error: " << ret; - return RET_ERROR; - } - if (quantType == QuantType_WeightQuant) { - PostBitPack(const_cast(rawDatas), shapeSize, bitNum); + auto ret = + memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(int8_t)); + if (ret != EOK) { + MS_LOG(ERROR) << "memcpy error: " << ret; + return RET_ERROR; + } + if (quantType == QuantType_WeightQuant) { + PostBitPack(const_cast(raw_datas), elem_count, bitNum); + } + weight->set_tensor_size(elem_count * sizeof(int8_t)); } - weightPtr->set_tensor_type(kNumberTypeInt8); - weightPtr->set_tensor_size(shapeSize * sizeof(int8_t)); } else { // per layer - size_t shapeSize = weightPtr->tensor_shape_size(); - auto *rawDatas = static_cast(weightPtr->tensor_addr()); - if (rawDatas == nullptr) { - MS_LOG(ERROR) << "rawDatas is nullptr"; - return RET_ERROR; - } - - weightPtr->quant_param().clear(); - vector qDatas(shapeSize); - - float min = 0; - float max = 0; - for (uint32_t i = 0; i < shapeSize; i++) { + float min = FLT_MAX; + float max = -FLT_MIN; + for (uint32_t i = 0; i < elem_count; i++) { // find max min - min = std::min(min, rawDatas[i]); - max = std::max(max, rawDatas[i]); + min = std::min(min, raw_datas[i]); + max = std::max(max, raw_datas[i]); } - std::unique_ptr quantParam = std::unique_ptr(new AnfQuantParam); - STATUS status = CalQuantizationParams(quantParam, min, max, false, quant_max, quant_min, bitNum); + schema::QuantParamT quant_param; + STATUS status = CalQuantizationParams(&quant_param, min, max, false, quant_max, quant_min, bitNum); if (status != RET_OK) { MS_LOG(ERROR) << "CalQuantizationParams failed" << status; return status; } + quant_params.emplace_back(quant_param); // update data and datatype - for (uint32_t i = 0; i < shapeSize; i++) { - float rawData = rawDatas[i]; - auto quant_data = std::round(rawData / quantParam->scale + quantParam->zeroPoint); - if (quant_data > quant_max) { - qDatas[i] = quant_max; - } else if (quant_data < quant_min) { - qDatas[i] = quant_min; - } else { - qDatas[i] = static_cast(quant_data); - } + for (uint32_t i = 0; i < elem_count; i++) { + float raw_data = raw_datas[i]; + auto quant_data = QuantizeData(raw_data, quant_param, quant_max, quant_min); + quant_datas[i] = quant_data; } - - weightPtr->set_quant_param(quantParam); - auto ret = memcpy_s(rawDatas, weightPtr->tensor_size(), qDatas.data(), shapeSize * sizeof(int8_t)); + auto ret = memcpy_s(raw_datas, weight->tensor_size(), quant_datas.data(), elem_count * sizeof(int8_t)); if (ret != EOK) { MS_LOG(ERROR) << "memcpy error: " << ret; return RET_ERROR; } if (quantType == QuantType_WeightQuant) { - PostBitPack(rawDatas, shapeSize, bitNum); + PostBitPack(raw_datas, elem_count, bitNum); } - - weightPtr->set_tensor_type(kNumberTypeInt8); - weightPtr->set_tensor_size(shapeSize * sizeof(int8_t)); + weight->set_tensor_size(elem_count * sizeof(int8_t)); } - + if (quant_params.empty()) { + MS_LOG(ERROR) << "quant_params empty"; + return RET_ERROR; + } + primitiveT_value->AddInputQuantParam(quant_params); return RET_OK; } diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h index 5277c7124d..7ce67af8bb 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.h +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h @@ -29,6 +29,7 @@ #include "ir/primitive.h" #include "abstract/dshape.h" #include "mindspore/lite/tools/converter/quantizer/quantizer.h" +#include "mindspore/lite/src/ir/primitive_t_value.h" namespace mindspore { namespace lite { @@ -58,7 +59,7 @@ class QuantStrategy { static const std::array mMulTypes; }; -STATUS CalQuantizationParams(std::unique_ptr &quantParam, double mMin, double mMax, +STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange, int quant_max, int quant_min, int num_bits); STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, @@ -97,12 +98,12 @@ T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { } template -T QuantizeData(float originData, const AnfQuantParam *quantParam, int quant_max, int quant_min) { +T QuantizeData(float originData, const schema::QuantParamT &quantParam, int quant_max, int quant_min) { MS_ASSERT(quantParam != nullptr); MS_ASSERT(quantParam->inited); - const auto scale = quantParam->scale; - const int zeroPoint = quantParam->zeroPoint; - const auto narrowRange = quantParam->narrowRange; + const auto scale = quantParam.scale; + const int zeroPoint = quantParam.zeroPoint; + const auto narrowRange = quantParam.narrowRange; const int maxLimit = quant_max; const int minLimit = quant_min; @@ -119,8 +120,9 @@ T QuantizeData(float originData, const AnfQuantParam *quantParam, int quant_max, void CalFakeNode(const AnfNodePtr &inTensor); -STATUS QuantFilter(ParamValueLitePtr &weightPtr, QuantType quantType, int quant_max, int quant_min, - size_t bitNum = UINT8_QUANTIZATION, bool per_channel = false); +STATUS QuantFilter(ParamValueLitePtr weight, std::shared_ptr primitiveT_value, QuantType quantType, + int quant_max, int quant_min, size_t bitNum = UINT8_QUANTIZATION, bool per_channel = false, + bool depth_wise = false); STATUS PostBitPack(float *weights, size_t shapeSize, size_t bitNum = UINT8_QUANTIZATION); } // namespace quant diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc deleted file mode 100644 index 54d796f667..0000000000 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "tools/converter/quantizer/weight_quantizer.h" -#include -#include -#include "src/common/common.h" -#include "ir/dtype/type_id.h" - -using std::string; -using std::vector; - -namespace mindspore { -namespace lite { -namespace quant { -WeightQuantizer::WeightQuantizer(FuncGraphPtr graph, const string &weightSize, - const std::string &convWeightChannelThreshold, const std::string &bitNum) - : Quantizer(graph) { - auto quantSize = static_cast(std::stoull(weightSize)); - this->bitNum = static_cast(std::stoull(bitNum)); - auto convQuantWeightChannelThreshold = static_cast(std::stoull(convWeightChannelThreshold)); - // TODO(...): update stractory - mStrategy.reset(new QuantStrategy(quantSize, convQuantWeightChannelThreshold)); -} - -// uint32_t GetConvChannel(TensorDefT *weight) { -// uint32_t channel = 0; -// const vector dims = weight->dims; - -// switch (weight->format) { -// case Format_NCHW: -// case Format_KCHW: -// case Format_NC4HW4: -// channel = static_cast(dims[NCHW_N]); -// break; -// case Format_NHWC: -// case Format_HWKC: -// channel = static_cast(dims[NHWC_N]); -// break; -// case Format_HWCK: -// channel = static_cast(dims[HWCK_K]); -// break; -// case Format_CKHW: -// channel = static_cast(dims[CKHW_K]); -// break; -// default: -// MS_LOGE("Unsupported format: %d", weight->format); -// return 0; -// } -// return channel; -// } - -STATUS WeightQuantizer::DoConvQuantize(const std::list &nodes) { - for (auto &cnode : nodes) { - if (!mStrategy->CanConvOpQuantized(cnode)) { - continue; - } - - auto inputNode = cnode->input(2); - if (!inputNode->isa()) { - return RET_ERROR; - } - - auto paramNode = inputNode->cast(); - if (!paramNode->has_default()) { - return RET_ERROR; - } - - ParamValueLitePtr paramValue = std::static_pointer_cast(paramNode->default_param()); - auto status = QuantFilter(paramValue, QuantType_WeightQuant, 127, -128, bitNum); - if (status != RET_OK) { - MS_LOG(ERROR) << "QuantFilter failed : " << status; - return status; - } - } - - return RET_OK; -} - -STATUS WeightQuantizer::DoMulQuantize(const std::list &nodes) { - for (auto &node : nodes) { - if (!mStrategy->CanMulOpQuantized(node)) { - continue; - } - - ParamValueLitePtr paramValue = nullptr; - for (size_t i = 1; i < node->size(); i++) { - auto inputNode = node->input(i); - if (inputNode->isa() == true) { - auto paramNode = inputNode->cast(); - if ((paramNode != nullptr) && (paramNode->has_default() == true)) { - paramValue = std::static_pointer_cast(paramNode->default_param()); - if ((paramValue == nullptr) || (paramValue->tensor_size() == 0) - || (paramValue->tensor_shape().size() != 4) - || (paramValue->tensor_addr() == nullptr) - || (paramValue->tensor_type() != mindspore::kNumberTypeFloat32)) { - paramValue = nullptr; - continue; - } else { - break; - } - } - } - } - if (paramValue == nullptr) { - MS_LOG(ERROR) << "No valid input param node !"; - continue; - } - auto status = QuantFilter(paramValue, QuantType_WeightQuant, 127, -128, bitNum); - if (status != RET_OK) { - MS_LOG(ERROR) << "QunatFilter failed" << status; - return RET_ERROR; - } - } - - return RET_OK; -} - -STATUS WeightQuantizer::DoQuantize(FuncGraphPtr funcGraph) { - auto ret = RET_OK; - auto cnodes = funcGraph->GetOrderedCnodes(); - ret = DoConvQuantize(cnodes); - if (ret != RET_OK) { - MS_LOG(ERROR) << "DoConvQuantize failed :" << ret; - return ret; - } - ret = DoMulQuantize(cnodes); - if (ret != RET_OK) { - MS_LOG(ERROR) << "DoMulQuantize failed :" << ret; - return ret; - } - return ret; -} -} // namespace quant -} // namespace lite -} // namespace mindspore - diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h b/mindspore/lite/tools/converter/quantizer/weight_quantizer.h deleted file mode 100644 index 0726dd3df1..0000000000 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef WEIGHT_QUANTIZER_H -#define WEIGHT_QUANTIZER_H - -#include -#include -#include -#include "tools/converter/quantizer/quantizer.h" -#include "tools/converter/quantizer/quantize_util.h" -#include "ir/func_graph.h" -#include "ir/anf.h" -#include "include/model.h" -#include "base/base.h" -#include "abstract/dshape.h" - -namespace mindspore { -namespace lite { -namespace quant { -class WeightQuantizer : public Quantizer { - public: - WeightQuantizer(FuncGraphPtr graph, const std::string& weightSize, - const std::string& covWeightChannelThreshold, const std::string& bitNum); - - ~WeightQuantizer() = default; - - STATUS DoQuantize(FuncGraphPtr funcGraph) override; - STATUS DoConvQuantize(const std::list &nodes); - STATUS DoMulQuantize(const std::list &nodes); - - private: - std::unique_ptr mStrategy; - size_t bitNum; -}; -} // namespace quant -} // namespace lite -} // namespace mindspore -#endif -