diff --git a/mindspore/lite/src/dequant.cc b/mindspore/lite/src/dequant.cc index 71b52a1a12..9aee49ab08 100644 --- a/mindspore/lite/src/dequant.cc +++ b/mindspore/lite/src/dequant.cc @@ -95,9 +95,6 @@ int DequantUtil::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tenso int DequantUtil::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { MS_ASSERT(dst_tensor != nullptr); - if (!dst_tensor->IsConst()) { - return RET_NO_CHANGE; - } auto quant_params = src_tensor.quantParams(); if (quant_params == nullptr || quant_params->size() == 0) { return RET_NO_CHANGE; diff --git a/mindspore/lite/src/dequant.h b/mindspore/lite/src/dequant.h index a3955eeb3c..66e52f8db4 100644 --- a/mindspore/lite/src/dequant.h +++ b/mindspore/lite/src/dequant.h @@ -48,36 +48,19 @@ class DequantUtil { MS_LOG(ERROR) << "Malloc failed."; return nullptr; } - if (input_tensor->shape().size() == kPerBatch && - input_tensor->quant_params().size() == static_cast(input_tensor->shape().at(0))) { // per batch matmul - auto per_batch_size = input_tensor->shape().at(0); - auto quant_param = input_tensor->quant_params(); - for (int i = 0; i < per_batch_size; i++) { - auto param = quant_param.at(i); - auto scale = param.scale; - auto zero_point = param.zeroPoint; - auto matrix_size = input_tensor->ElementsNum() / per_batch_size; - for (int64_t j = 0; j < matrix_size; j++) { - dequant_datas[i * matrix_size + j] = static_cast
((quant_datas[i * matrix_size + j] - zero_point) * scale); - } - } - } else if (input_tensor->quant_params().size() != kPerTensor) { - auto channels = static_cast(input_tensor->Batch()); + auto quant_param = input_tensor->quant_params(); + if (quant_param.size() != kPerTensor) { + auto shapes = input_tensor->shape(); + auto channels = quant_param.size(); if (!channel_first) { - if (input_tensor->shape().size() != 2) { - MS_LOG(ERROR) << "unexpected shape size: " << input_tensor->shape().size(); + if (static_cast(shapes.size()) != 2 || shapes[1] != static_cast(channels)) { + MS_LOG(ERROR) << "shape size: " << shapes.size() << " quant params size: " << channels; free(dequant_datas); return nullptr; } - channels = input_tensor->shape()[1]; - } - if (input_tensor->quant_params().size() != channels) { - MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->quant_params().size() << channels; - free(dequant_datas); - return nullptr; } + size_t per_channel_size = input_tensor->ElementsNum() / channels; - auto quant_param = input_tensor->quant_params(); for (size_t i = 0; i < channels; i++) { auto param = quant_param.at(i); auto scale = param.scale; @@ -98,7 +81,6 @@ class DequantUtil { } } } else { - auto quant_param = input_tensor->quant_params(); auto quant_clusters = input_tensor->quant_clusters(); auto param = quant_param.front(); auto scale = param.scale; diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index 9b62540fa9..835bc4ba50 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -275,23 +275,14 @@ inline void FreeRestoreTensors(std::map *restored_origin_ten restored_origin_tensors->clear(); } -inline bool IsChannelFirst(const std::vector &in_tensors, OpParameter *op_parameter) { +inline bool IsChannelFirst(int index, OpParameter *op_parameter) { MS_ASSERT(op_parameter != nullptr); if (op_parameter->type_ == schema::PrimitiveType_MatMul) { - for (size_t i = 0; i < in_tensors.size(); i++) { - auto tensor = in_tensors.at(i); - MS_ASSERT(tensor != nullptr); - if (tensor->shape().size() != 2) { - continue; - } - const auto *param = reinterpret_cast(op_parameter); - if (i == 1) { - return !(param->a_transpose_); - } else if (i == 2) { - return param->b_transpose_; - } else { - // not care bias data - } + const auto *param = reinterpret_cast(op_parameter); + if (index == 0) { + return !(param->a_transpose_); + } else if (index == 1) { + return param->b_transpose_; } } return true; @@ -307,8 +298,9 @@ kernel::LiteKernel *Scheduler::FindCpuKernel(const std::vector &in_ten return nullptr; } std::map restored_origin_tensors; + int index = 0; for (auto &tensor : in_tensors) { - auto channel_first = IsChannelFirst(in_tensors, op_parameter); + auto channel_first = IsChannelFirst(index++, op_parameter); auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kernel_data_type); if (restore_tensor != nullptr) { restored_origin_tensors[tensor] = restore_tensor; @@ -385,7 +377,8 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector &in // weight quant std::map restored_origin_tensors; for (auto &tensor : in_tensors) { - auto channel_first = IsChannelFirst(in_tensors, op_parameter); + int index = 0; + auto channel_first = IsChannelFirst(index++, op_parameter); auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kNumberTypeFloat32); if (restore_tensor != nullptr) { restored_origin_tensors[tensor] = restore_tensor; diff --git a/mindspore/lite/src/train/optimizer_kernel.h b/mindspore/lite/src/train/optimizer_kernel.h index 21d487554d..19d145de97 100644 --- a/mindspore/lite/src/train/optimizer_kernel.h +++ b/mindspore/lite/src/train/optimizer_kernel.h @@ -32,7 +32,7 @@ class OptimizerKernel : public LiteKernel { ~OptimizerKernel() = default; enum class WeightUpdateMode { NORMAL, VIRTUAL_BATCH }; - WeightUpdateMode get_optimizer_mode() { return weightUpdateMod_; } + WeightUpdateMode get_optimizer_mode() { return weight_update_mod_; } int Init() override { default_lr_ = reinterpret_cast(in_tensors_.at(lr_idx_)->MutableData())[0]; @@ -67,6 +67,7 @@ class OptimizerKernel : public LiteKernel { } valid_grad_sum_ = false; std::fill(grad_sum_, grad_sum_ + elem_num, 0); + weight_update_mod_ = WeightUpdateMode::VIRTUAL_BATCH; } else { if (grad_sum_ != nullptr) { OptimizerStep(); @@ -74,7 +75,6 @@ class OptimizerKernel : public LiteKernel { grad_sum_ = nullptr; } } - weightUpdateMod_ = WeightUpdateMode::VIRTUAL_BATCH; return RET_OK; } @@ -112,7 +112,7 @@ class OptimizerKernel : public LiteKernel { bool valid_grad_sum_ = false; private: - WeightUpdateMode weightUpdateMod_ = WeightUpdateMode::NORMAL; + WeightUpdateMode weight_update_mod_ = WeightUpdateMode::NORMAL; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/test/models_mindspore_mixbit.cfg b/mindspore/lite/test/models_mindspore_mixbit.cfg index d1c66733f8..1fd0d6a857 100644 --- a/mindspore/lite/test/models_mindspore_mixbit.cfg +++ b/mindspore/lite/test/models_mindspore_mixbit.cfg @@ -1 +1 @@ -#efficientnet.mindir 41.37 9.98 +mobilenetv2.r1.1.mindir 13 4 diff --git a/mindspore/lite/test/models_mindspore_weightquant.cfg b/mindspore/lite/test/models_mindspore_weightquant.cfg index f954cf8ce8..06f82aa89a 100644 --- a/mindspore/lite/test/models_mindspore_weightquant.cfg +++ b/mindspore/lite/test/models_mindspore_weightquant.cfg @@ -1,3 +1 @@ -#retinaface_732_1280_iod.mindir 16.9 -#mobilefacenet_iod.mindir 13.5 -#effnet_iod.mindir +ssd.r1.1.mindir 1.3 diff --git a/mindspore/lite/test/models_tf_weightquant.cfg b/mindspore/lite/test/models_tf_weightquant.cfg new file mode 100644 index 0000000000..420e818e40 --- /dev/null +++ b/mindspore/lite/test/models_tf_weightquant.cfg @@ -0,0 +1 @@ +ml_video_edit_enhance.pb 22 \ No newline at end of file diff --git a/mindspore/lite/test/models_tflite_weightquant.cfg b/mindspore/lite/test/models_tflite_weightquant.cfg index c8cc31ed40..c1562c588a 100644 --- a/mindspore/lite/test/models_tflite_weightquant.cfg +++ b/mindspore/lite/test/models_tflite_weightquant.cfg @@ -1,2 +1,3 @@ ml_face_openclose.tflite 0.5 hiai_ghostnet.tflite 4.7 +siteAI_digcom_AI_ECN.tflite 22 \ No newline at end of file diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index 7922f8d4d7..cc8ae97233 100755 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -173,7 +173,7 @@ function Run_Converter() { fi model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` echo ${model_name} >> "${run_converter_log_file}" - echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" + echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}_weightquant'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 if [ $? = 0 ]; then converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} @@ -190,8 +190,8 @@ function Run_Converter() { fi model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` echo ${model_name} >> "${run_converter_log_file}" - echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" - ./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16 + echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}" + ./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0 if [ $? = 0 ]; then converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} else @@ -199,6 +199,23 @@ function Run_Converter() { fi done < ${models_mindspore_weightquant_config} + # Convert tf weightquant models: + while read line; do + weight_quant_line_info=${line} + if [[ $weight_quant_line_info == \#* ]]; then + continue + fi + model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` + echo ${model_name} >> "${run_converter_log_file}" + echo './converter_lite --fmk=TF --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_weightquant' >> "${run_converter_log_file}" + ./converter_lite --fmk=TF --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 + if [ $? = 0 ]; then + converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} + else + converter_result='converter weight_quant '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1 + fi + done < ${models_tf_weightquant_config} + # Convert mindir mixbit weightquant models: while read line; do line_info=${line} @@ -208,16 +225,16 @@ function Run_Converter() { model_name=`echo ${line_info}|awk -F ' ' '{print $1}'` echo ${model_name}'_7bit' >> "${run_converter_log_file}" - echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_7bit --quantType=WeightQuant --bitNum=7 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" - ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_7bit' --quantType=WeightQuant --bitNum=7 --quantWeightSize=500 --quantWeightChannel=16 + echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_7bit --quantType=WeightQuant --bitNum=7 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}" + ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_7bit' --quantType=WeightQuant --bitNum=7 --quantWeightSize=0 --quantWeightChannel=0 if [ $? = 0 ]; then converter_result='converter mindspore '${model_name}'_7bit pass';echo ${converter_result} >> ${run_converter_result_file} else converter_result='converter mindspore '${model_name}'_7bit failed';echo ${converter_result} >> ${run_converter_result_file};return 1 fi echo ${model_name}'_9bit' >> "${run_converter_log_file}" - echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_9bit --quantType=WeightQuant --bitNum=9 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" - ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_9bit' --quantType=WeightQuant --bitNum=9 --quantWeightSize=500 --quantWeightChannel=16 + echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_9bit --quantType=WeightQuant --bitNum=9 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}" + ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_9bit' --quantType=WeightQuant --bitNum=9 --quantWeightSize=0 --quantWeightChannel=0 if [ $? = 0 ]; then converter_result='converter mindspore '${model_name}'_9bit pass';echo ${converter_result} >> ${run_converter_result_file} else @@ -572,12 +589,30 @@ function Run_x86() { echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" if [ $? = 0 ]; then - run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_tflite_weightquant_config} + # Run tf weightquant converted models: + while read line; do + weight_quant_line_info=${line} + if [[ $weight_quant_line_info == \#* ]]; then + continue + fi + model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` + accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'` + echo ${model_name} >> "${run_x86_log_file}" + echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --inputShapes='${input_shapes}' --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}" + ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" + if [ $? = 0 ]; then + run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} + else + run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + fi + done < ${models_tf_weightquant_config} + # Run mindir weight quantization converted models: while read line; do weight_quant_line_info=${line} @@ -590,9 +625,9 @@ function Run_x86() { echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_log_file}" if [ $? = 0 ]; then - run_result='x86: '${model_name}'[weight_quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='x86: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='x86: '${model_name}'[weight_quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='x86: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_mindspore_weightquant_config} @@ -845,9 +880,9 @@ function Run_x86_sse() { echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}" if [ $? = 0 ]; then - run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='x86_sse: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='x86_sse: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_tflite_weightquant_config} @@ -863,9 +898,9 @@ function Run_x86_sse() { echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}" if [ $? = 0 ]; then - run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='x86_sse: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='x86_sse: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_mindspore_weightquant_config} @@ -1135,9 +1170,9 @@ function Run_x86_avx() { echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}" if [ $? = 0 ]; then - run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='x86_avx: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='x86_avx: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_mindspore_weightquant_config} @@ -1566,9 +1601,9 @@ function Run_arm64() { echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --loopCount=1 --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_fp32_log_file}" if [ $? = 0 ]; then - run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file} + run_result='arm64: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file} else - run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 + run_result='arm64: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 fi done < ${models_mindspore_weightquant_config} @@ -2147,6 +2182,7 @@ models_npu_config=${basepath}/models_npu.cfg models_compatibility_config=${basepath}/models_compatibility.cfg models_with_multiple_inputs_config=${basepath}/models_with_multiple_inputs.cfg models_for_process_only_config=${basepath}/models_for_process_only.cfg +models_tf_weightquant_config=${basepath}/models_tf_weightquant.cfg ms_models_path=${basepath}/ms_models diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc index 4e910bd76d..7671bc41be 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/infershape_pass.cc @@ -137,6 +137,7 @@ STATUS NodeInferShape(const std::unique_ptr &node, const std::ve MS_LOG(ERROR) << "parameter is nullptr."; return RET_ERROR; } + parameter->quant_type_ = node->quantType; parameter->infer_flag_ = true; auto ret = KernelInferShape(inputs, outputs, parameter); fbb.Clear(); diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.cc b/mindspore/lite/tools/converter/quantizer/quantize_util.cc index b799c972b0..682b07d52e 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.cc +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.cc @@ -947,4 +947,48 @@ void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_coun *desired_min = min; } +int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) { + auto channels = dims[0]; + if (!(*channel_at_first)) { + if (dims.size() != 2) { + MS_LOG(WARNING) << "unexpected dims size: " << dims.size(); + *channel_at_first = true; + } else { + channels = dims[1]; + } + } else { + channels = channel_cnt == -1 ? channels : channel_cnt; + } + return channels; +} + +void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, + int *channel_cnt) { + if (primitive->name() == ops::kNameMatMul && static_cast(shapes.size()) == 2) { + auto matmul_prim = primitive->cast>(); + MS_ASSERT(matmul_prim != nullptr); + *channel_at_first = + index != 1 || (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b()); + } else if (primitive->name() == ops::kNameLSTM) { + if (index == 1 || index == 2) { + if (shapes.size() != 3) { + MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); + } else { + *channel_cnt = shapes[0] * shapes[1]; + } + } else if (index == 3) { + if (shapes.size() != 2) { + MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size(); + } else { + auto tensor_elem_cnt = shapes[0] * shapes[1]; + if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) { + *channel_cnt = 4; + } + } + } else { + MS_LOG(WARNING) << "unexpected index of lstm: " << index; + } + } +} + } // namespace mindspore::lite::quant diff --git a/mindspore/lite/tools/converter/quantizer/quantize_util.h b/mindspore/lite/tools/converter/quantizer/quantize_util.h index d20d46a588..cb56f80c2b 100644 --- a/mindspore/lite/tools/converter/quantizer/quantize_util.h +++ b/mindspore/lite/tools/converter/quantizer/quantize_util.h @@ -28,6 +28,7 @@ #include #include #include "ops/mat_mul.h" +#include "ops/lstm.h" #include "ops/fusion/full_connection.h" #include "tools/converter/quantizer/quantizer.h" #include "include/errorcode.h" @@ -113,6 +114,11 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, bool channel_at_first, float *desired_max, float *desired_min); +int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first); + +void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first, + int *channel_cnt); + template T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { MS_ASSERT(quantParam != nullptr); @@ -169,19 +175,12 @@ template STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type, std::vector *quant_params, const int &quant_max, const int &quant_min, const size_t &bit_num, const bool &k_means, std::vector *quant_datas, - std::vector *dequant_datas, TypeId quant_data_type, bool channel_at_first = true) { + std::vector *dequant_datas, TypeId quant_data_type, bool channel_at_first = true, + int channel_cnt = -1) { auto dims = weight->shape(); size_t elem_count = weight->DataSize(); auto *raw_datas = static_cast(weight->data_c()); - auto channels = dims[0]; - if (!channel_at_first) { - if (dims.size() != 2) { - MS_LOG(ERROR) << "unexpected dims size: " << dims.size(); - channel_at_first = true; - } else { - channels = dims[1]; - } - } + auto channels = CalChannels(dims, channel_cnt, &channel_at_first); if (channels == 0) { MS_LOG(ERROR) << "channels is zero"; return RET_ERROR; @@ -358,15 +357,11 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv int ret = RET_OK; if (per_channel) { bool channel_at_first = true; - if (primitive->name() == ops::kNameMatMul && weight->shape().size() == 2) { - auto matmul_prim = primitive->cast>(); - MS_ASSERT(matmul_prim != nullptr); - channel_at_first = - index != 1 || (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b()); - } + int channel_cnt = -1; + CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt); // channel at first ret = DoPerChannelQuant(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data, - &dequant_datas, quant_data_type, channel_at_first); + &dequant_datas, quant_data_type, channel_at_first, channel_cnt); if (ret == RET_CONTINUE) { return ret; } else if (ret != RET_OK) { diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc index a6523951a2..bf9f0ea632 100644 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc @@ -126,71 +126,47 @@ STATUS WeightQuantizer::DoConvQuantize(const CNodePtr &cnode) { } STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) { - auto already_quant = false; - tensor::TensorPtr tensor_info = nullptr; - ParameterPtr param_node = nullptr; - int index = 0; for (size_t i = 1; i < cnode->size(); i++) { auto inputNode = cnode->input(i); if (inputNode->isa()) { - param_node = inputNode->cast(); + auto param_node = inputNode->cast(); if ((param_node != nullptr) && param_node->has_default()) { - tensor_info = std::static_pointer_cast(param_node->default_param()); - if ((tensor_info == nullptr) || (tensor_info->Size() == 0) || (tensor_info->data_c() == nullptr)) { - tensor_info = nullptr; - continue; - } else if (tensor_info->data_type() == mindspore::kNumberTypeInt8 || - tensor_info->data_type() == mindspore::kNumberTypeInt16) { - MS_LOG(INFO) << "the node: " << cnode->fullname_with_scope() << " input_i: " << i << "has been " - << " quantized"; - already_quant = true; - break; - } else if (tensor_info->data_type() != mindspore::kNumberTypeFloat32) { - tensor_info = nullptr; - continue; - } else { - index = i; - break; + auto tensor_info = std::static_pointer_cast(param_node->default_param()); + if ((tensor_info != nullptr) && (tensor_info->data_type() == mindspore::kNumberTypeFloat32) && + (tensor_info->Size() > 0) && (tensor_info->data_c() != nullptr)) { + auto primitive = GetValueNode(cnode->input(0)); + if (primitive == nullptr) { + MS_LOG(ERROR) << "primitive is nullptr"; + return RET_ERROR; + } + + auto status = RET_ERROR; + auto per_channel = true; + if (i == 3) { + per_channel = false; + } + if (type_id_ == kNumberTypeInt8) { + status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, + bit_num_, per_channel, type_id_, i - 1); + } else if (type_id_ == kNumberTypeInt16) { + status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, + bit_num_, per_channel, type_id_, i - 1); + } + if (status == RET_CONTINUE) { + continue; + } else if (status != RET_OK) { + MS_LOG(ERROR) << cnode->fullname_with_scope() << " input " << i << " QuantFilter failed : " << status; + return status; + } + status = SetAbstract(tensor_info, param_node, primitive); + if (status != RET_OK) { + MS_LOG(ERROR) << cnode->fullname_with_scope() << " input " << i << " SetAbstract failed : " << status; + return RET_ERROR; + } } } } } - - if (already_quant) { - return RET_OK; - } - - if (tensor_info == nullptr) { - MS_LOG(WARNING) << cnode->fullname_with_scope() << " No valid input param node !"; - return RET_OK; - } - - auto primitive = GetValueNode(cnode->input(0)); - if (primitive == nullptr) { - MS_LOG(ERROR) << "primitive is nullptr"; - return RET_ERROR; - } - - auto status = RET_ERROR; - if (type_id_ == kNumberTypeInt8) { - status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, - type_id_, index - 1); - } else if (type_id_ == kNumberTypeInt16) { - status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, - type_id_, index - 1); - } - if (status == RET_CONTINUE) { - return RET_OK; - } else if (status != RET_OK) { - MS_LOG(ERROR) << "QuantFilter failed : " << status; - return status; - } - status = SetAbstract(tensor_info, param_node, primitive); - if (status != RET_OK) { - MS_LOG(ERROR) << "SetAbstract failed : " << status; - return RET_ERROR; - } - return RET_OK; } @@ -290,11 +266,11 @@ STATUS WeightQuantizer::ProcessLstmWeightByIndex(const CNodePtr &cnode, const Pr } auto status = RET_ERROR; if (type_id_ == kNumberTypeInt8) { - status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false, + status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, type_id_, index - 1); } else if (type_id_ == kNumberTypeInt16) { - status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, - false, type_id_, index - 1); + status = QuantFilter(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, + type_id_, index - 1); } if (status == RET_CONTINUE) { return RET_OK;