!14546 lstm per channel quant

From: @xutianchun
Reviewed-by: @HilbertDavid,@zhanghaibo5
Signed-off-by: @HilbertDavid
pull/14546/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 1dbc4e9eaf

@ -95,9 +95,6 @@ int DequantUtil::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tenso
int DequantUtil::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { int DequantUtil::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) {
MS_ASSERT(dst_tensor != nullptr); MS_ASSERT(dst_tensor != nullptr);
if (!dst_tensor->IsConst()) {
return RET_NO_CHANGE;
}
auto quant_params = src_tensor.quantParams(); auto quant_params = src_tensor.quantParams();
if (quant_params == nullptr || quant_params->size() == 0) { if (quant_params == nullptr || quant_params->size() == 0) {
return RET_NO_CHANGE; return RET_NO_CHANGE;

@ -48,36 +48,19 @@ class DequantUtil {
MS_LOG(ERROR) << "Malloc failed."; MS_LOG(ERROR) << "Malloc failed.";
return nullptr; return nullptr;
} }
if (input_tensor->shape().size() == kPerBatch &&
input_tensor->quant_params().size() == static_cast<size_t>(input_tensor->shape().at(0))) { // per batch matmul
auto per_batch_size = input_tensor->shape().at(0);
auto quant_param = input_tensor->quant_params(); auto quant_param = input_tensor->quant_params();
for (int i = 0; i < per_batch_size; i++) { if (quant_param.size() != kPerTensor) {
auto param = quant_param.at(i); auto shapes = input_tensor->shape();
auto scale = param.scale; auto channels = quant_param.size();
auto zero_point = param.zeroPoint;
auto matrix_size = input_tensor->ElementsNum() / per_batch_size;
for (int64_t j = 0; j < matrix_size; j++) {
dequant_datas[i * matrix_size + j] = static_cast<DT>((quant_datas[i * matrix_size + j] - zero_point) * scale);
}
}
} else if (input_tensor->quant_params().size() != kPerTensor) {
auto channels = static_cast<size_t>(input_tensor->Batch());
if (!channel_first) { if (!channel_first) {
if (input_tensor->shape().size() != 2) { if (static_cast<int>(shapes.size()) != 2 || shapes[1] != static_cast<int>(channels)) {
MS_LOG(ERROR) << "unexpected shape size: " << input_tensor->shape().size(); MS_LOG(ERROR) << "shape size: " << shapes.size() << " quant params size: " << channels;
free(dequant_datas); free(dequant_datas);
return nullptr; return nullptr;
} }
channels = input_tensor->shape()[1];
}
if (input_tensor->quant_params().size() != channels) {
MS_LOG(ERROR) << "Quant param not equal channel num " << input_tensor->quant_params().size() << channels;
free(dequant_datas);
return nullptr;
} }
size_t per_channel_size = input_tensor->ElementsNum() / channels; size_t per_channel_size = input_tensor->ElementsNum() / channels;
auto quant_param = input_tensor->quant_params();
for (size_t i = 0; i < channels; i++) { for (size_t i = 0; i < channels; i++) {
auto param = quant_param.at(i); auto param = quant_param.at(i);
auto scale = param.scale; auto scale = param.scale;
@ -98,7 +81,6 @@ class DequantUtil {
} }
} }
} else { } else {
auto quant_param = input_tensor->quant_params();
auto quant_clusters = input_tensor->quant_clusters(); auto quant_clusters = input_tensor->quant_clusters();
auto param = quant_param.front(); auto param = quant_param.front();
auto scale = param.scale; auto scale = param.scale;

@ -275,23 +275,14 @@ inline void FreeRestoreTensors(std::map<Tensor *, Tensor *> *restored_origin_ten
restored_origin_tensors->clear(); restored_origin_tensors->clear();
} }
inline bool IsChannelFirst(const std::vector<Tensor *> &in_tensors, OpParameter *op_parameter) { inline bool IsChannelFirst(int index, OpParameter *op_parameter) {
MS_ASSERT(op_parameter != nullptr); MS_ASSERT(op_parameter != nullptr);
if (op_parameter->type_ == schema::PrimitiveType_MatMul) { if (op_parameter->type_ == schema::PrimitiveType_MatMul) {
for (size_t i = 0; i < in_tensors.size(); i++) {
auto tensor = in_tensors.at(i);
MS_ASSERT(tensor != nullptr);
if (tensor->shape().size() != 2) {
continue;
}
const auto *param = reinterpret_cast<MatMulParameter *>(op_parameter); const auto *param = reinterpret_cast<MatMulParameter *>(op_parameter);
if (i == 1) { if (index == 0) {
return !(param->a_transpose_); return !(param->a_transpose_);
} else if (i == 2) { } else if (index == 1) {
return param->b_transpose_; return param->b_transpose_;
} else {
// not care bias data
}
} }
} }
return true; return true;
@ -307,8 +298,9 @@ kernel::LiteKernel *Scheduler::FindCpuKernel(const std::vector<Tensor *> &in_ten
return nullptr; return nullptr;
} }
std::map<Tensor *, Tensor *> restored_origin_tensors; std::map<Tensor *, Tensor *> restored_origin_tensors;
int index = 0;
for (auto &tensor : in_tensors) { for (auto &tensor : in_tensors) {
auto channel_first = IsChannelFirst(in_tensors, op_parameter); auto channel_first = IsChannelFirst(index++, op_parameter);
auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kernel_data_type); auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kernel_data_type);
if (restore_tensor != nullptr) { if (restore_tensor != nullptr) {
restored_origin_tensors[tensor] = restore_tensor; restored_origin_tensors[tensor] = restore_tensor;
@ -385,7 +377,8 @@ kernel::LiteKernel *Scheduler::FindBackendKernel(const std::vector<Tensor *> &in
// weight quant // weight quant
std::map<Tensor *, Tensor *> restored_origin_tensors; std::map<Tensor *, Tensor *> restored_origin_tensors;
for (auto &tensor : in_tensors) { for (auto &tensor : in_tensors) {
auto channel_first = IsChannelFirst(in_tensors, op_parameter); int index = 0;
auto channel_first = IsChannelFirst(index++, op_parameter);
auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kNumberTypeFloat32); auto *restore_tensor = DequantUtil::DequantTensor(tensor, desc.data_type, channel_first, kNumberTypeFloat32);
if (restore_tensor != nullptr) { if (restore_tensor != nullptr) {
restored_origin_tensors[tensor] = restore_tensor; restored_origin_tensors[tensor] = restore_tensor;

@ -32,7 +32,7 @@ class OptimizerKernel : public LiteKernel {
~OptimizerKernel() = default; ~OptimizerKernel() = default;
enum class WeightUpdateMode { NORMAL, VIRTUAL_BATCH }; enum class WeightUpdateMode { NORMAL, VIRTUAL_BATCH };
WeightUpdateMode get_optimizer_mode() { return weightUpdateMod_; } WeightUpdateMode get_optimizer_mode() { return weight_update_mod_; }
int Init() override { int Init() override {
default_lr_ = reinterpret_cast<float *>(in_tensors_.at(lr_idx_)->MutableData())[0]; default_lr_ = reinterpret_cast<float *>(in_tensors_.at(lr_idx_)->MutableData())[0];
@ -67,6 +67,7 @@ class OptimizerKernel : public LiteKernel {
} }
valid_grad_sum_ = false; valid_grad_sum_ = false;
std::fill(grad_sum_, grad_sum_ + elem_num, 0); std::fill(grad_sum_, grad_sum_ + elem_num, 0);
weight_update_mod_ = WeightUpdateMode::VIRTUAL_BATCH;
} else { } else {
if (grad_sum_ != nullptr) { if (grad_sum_ != nullptr) {
OptimizerStep(); OptimizerStep();
@ -74,7 +75,6 @@ class OptimizerKernel : public LiteKernel {
grad_sum_ = nullptr; grad_sum_ = nullptr;
} }
} }
weightUpdateMod_ = WeightUpdateMode::VIRTUAL_BATCH;
return RET_OK; return RET_OK;
} }
@ -112,7 +112,7 @@ class OptimizerKernel : public LiteKernel {
bool valid_grad_sum_ = false; bool valid_grad_sum_ = false;
private: private:
WeightUpdateMode weightUpdateMod_ = WeightUpdateMode::NORMAL; WeightUpdateMode weight_update_mod_ = WeightUpdateMode::NORMAL;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel

@ -1 +1 @@
#efficientnet.mindir 41.37 9.98 mobilenetv2.r1.1.mindir 13 4

@ -1,3 +1 @@
#retinaface_732_1280_iod.mindir 16.9 ssd.r1.1.mindir 1.3
#mobilefacenet_iod.mindir 13.5
#effnet_iod.mindir

@ -0,0 +1 @@
ml_video_edit_enhance.pb 22

@ -1,2 +1,3 @@
ml_face_openclose.tflite 0.5 ml_face_openclose.tflite 0.5
hiai_ghostnet.tflite 4.7 hiai_ghostnet.tflite 4.7
siteAI_digcom_AI_ECN.tflite 22

@ -173,7 +173,7 @@ function Run_Converter() {
fi fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
echo ${model_name} >> "${run_converter_log_file}" echo ${model_name} >> "${run_converter_log_file}"
echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}" echo './converter_lite --fmk=TFLITE --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}_weightquant'--quantType=WeightQuant --bitNum=8 --quantWeightChannel=0' >> "${run_converter_log_file}"
./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0 ./converter_lite --fmk=TFLITE --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0
if [ $? = 0 ]; then if [ $? = 0 ]; then
converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
@ -190,8 +190,8 @@ function Run_Converter() {
fi fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'` model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
echo ${model_name} >> "${run_converter_log_file}" echo ${model_name} >> "${run_converter_log_file}"
echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}' --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}"
./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=500 --quantWeightChannel=16 ./converter_lite --fmk=MINDIR --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightSize=0 --quantWeightChannel=0
if [ $? = 0 ]; then if [ $? = 0 ]; then
converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file} converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
else else
@ -199,6 +199,23 @@ function Run_Converter() {
fi fi
done < ${models_mindspore_weightquant_config} done < ${models_mindspore_weightquant_config}
# Convert tf weightquant models:
while read line; do
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
echo ${model_name} >> "${run_converter_log_file}"
echo './converter_lite --fmk=TF --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_weightquant' >> "${run_converter_log_file}"
./converter_lite --fmk=TF --modelFile=$models_path/${model_name} --outputFile=${ms_models_path}/${model_name}_weightquant --quantType=WeightQuant --bitNum=8 --quantWeightChannel=0
if [ $? = 0 ]; then
converter_result='converter weight_quant '${model_name}' pass';echo ${converter_result} >> ${run_converter_result_file}
else
converter_result='converter weight_quant '${model_name}' failed';echo ${converter_result} >> ${run_converter_result_file};return 1
fi
done < ${models_tf_weightquant_config}
# Convert mindir mixbit weightquant models: # Convert mindir mixbit weightquant models:
while read line; do while read line; do
line_info=${line} line_info=${line}
@ -208,16 +225,16 @@ function Run_Converter() {
model_name=`echo ${line_info}|awk -F ' ' '{print $1}'` model_name=`echo ${line_info}|awk -F ' ' '{print $1}'`
echo ${model_name}'_7bit' >> "${run_converter_log_file}" echo ${model_name}'_7bit' >> "${run_converter_log_file}"
echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_7bit --quantType=WeightQuant --bitNum=7 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_7bit --quantType=WeightQuant --bitNum=7 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}"
./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_7bit' --quantType=WeightQuant --bitNum=7 --quantWeightSize=500 --quantWeightChannel=16 ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_7bit' --quantType=WeightQuant --bitNum=7 --quantWeightSize=0 --quantWeightChannel=0
if [ $? = 0 ]; then if [ $? = 0 ]; then
converter_result='converter mindspore '${model_name}'_7bit pass';echo ${converter_result} >> ${run_converter_result_file} converter_result='converter mindspore '${model_name}'_7bit pass';echo ${converter_result} >> ${run_converter_result_file}
else else
converter_result='converter mindspore '${model_name}'_7bit failed';echo ${converter_result} >> ${run_converter_result_file};return 1 converter_result='converter mindspore '${model_name}'_7bit failed';echo ${converter_result} >> ${run_converter_result_file};return 1
fi fi
echo ${model_name}'_9bit' >> "${run_converter_log_file}" echo ${model_name}'_9bit' >> "${run_converter_log_file}"
echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_9bit --quantType=WeightQuant --bitNum=9 --quantWeightSize=500 --quantWeightChannel=16' >> "${run_converter_log_file}" echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}' --outputFile='${ms_models_path}'/'${model_name}'_9bit --quantType=WeightQuant --bitNum=9 --quantWeightSize=0 --quantWeightChannel=0' >> "${run_converter_log_file}"
./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_9bit' --quantType=WeightQuant --bitNum=9 --quantWeightSize=500 --quantWeightChannel=16 ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name} --outputFile=${ms_models_path}/${model_name}'_9bit' --quantType=WeightQuant --bitNum=9 --quantWeightSize=0 --quantWeightChannel=0
if [ $? = 0 ]; then if [ $? = 0 ]; then
converter_result='converter mindspore '${model_name}'_9bit pass';echo ${converter_result} >> ${run_converter_result_file} converter_result='converter mindspore '${model_name}'_9bit pass';echo ${converter_result} >> ${run_converter_result_file}
else else
@ -572,12 +589,30 @@ function Run_x86() {
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='x86: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='x86: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_tflite_weightquant_config} done < ${models_tflite_weightquant_config}
# Run tf weightquant converted models:
while read line; do
weight_quant_line_info=${line}
if [[ $weight_quant_line_info == \#* ]]; then
continue
fi
model_name=`echo ${weight_quant_line_info}|awk -F ' ' '{print $1}'`
accuracy_limit=`echo ${weight_quant_line_info}|awk -F ' ' '{print $2}'`
echo ${model_name} >> "${run_x86_log_file}"
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --inputShapes='${input_shapes}' --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out' >> "${run_x86_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit}>> "${run_x86_log_file}"
if [ $? = 0 ]; then
run_result='x86: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else
run_result='x86: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi
done < ${models_tf_weightquant_config}
# Run mindir weight quantization converted models: # Run mindir weight quantization converted models:
while read line; do while read line; do
weight_quant_line_info=${line} weight_quant_line_info=${line}
@ -590,9 +625,9 @@ function Run_x86() {
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}" echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='x86: '${model_name}'[weight_quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='x86: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='x86: '${model_name}'[weight_quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='x86: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_mindspore_weightquant_config} done < ${models_mindspore_weightquant_config}
@ -845,9 +880,9 @@ function Run_x86_sse() {
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}" echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='x86_sse: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='x86_sse: '${model_name}_weightquant' pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='x86_sse: '${model_name}' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='x86_sse: '${model_name}_weightquant' failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_tflite_weightquant_config} done < ${models_tflite_weightquant_config}
@ -863,9 +898,9 @@ function Run_x86_sse() {
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}" echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_sse_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_sse_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='x86_sse: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='x86_sse: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='x86_sse: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='x86_sse: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_mindspore_weightquant_config} done < ${models_mindspore_weightquant_config}
@ -1135,9 +1170,9 @@ function Run_x86_avx() {
echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}" echo './benchmark --modelFile='${ms_models_path}'/'${model_name}'.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/'${model_name}'.ms.out --accuracyThreshold=${accuracy_limit}' >> "${run_x86_avx_log_file}"
./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}" ./benchmark --modelFile=${ms_models_path}/${model_name}_weightquant.ms --inDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/input/${model_name}.ms.bin --benchmarkDataFile=/home/workspace/mindspore_dataset/mslite/models/hiai/input_output/output/${model_name}.ms.out --accuracyThreshold=${accuracy_limit} >> "${run_x86_avx_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='x86_avx: '${model_name}'[weight quant] pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='x86_avx: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='x86_avx: '${model_name}'[weight quant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='x86_avx: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_mindspore_weightquant_config} done < ${models_mindspore_weightquant_config}
@ -1566,9 +1601,9 @@ function Run_arm64() {
echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --loopCount=1 --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --modelFile='${model_name}'_weightquant.ms --inDataFile=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --benchmarkDataFile=/data/local/tmp/input_output/output/'${model_name}'.ms.out --loopCount=1 --accuracyThreshold='${accuracy_limit} >> adb_run_cmd.txt
adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_fp32_log_file}" adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_arm64_fp32_log_file}"
if [ $? = 0 ]; then if [ $? = 0 ]; then
run_result='arm64: '${model_name}'[weightQuant] pass'; echo ${run_result} >> ${run_benchmark_result_file} run_result='arm64: '${model_name}'_weightquant pass'; echo ${run_result} >> ${run_benchmark_result_file}
else else
run_result='arm64: '${model_name}'[weightQuant] failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1 run_result='arm64: '${model_name}'_weightquant failed'; echo ${run_result} >> ${run_benchmark_result_file}; return 1
fi fi
done < ${models_mindspore_weightquant_config} done < ${models_mindspore_weightquant_config}
@ -2147,6 +2182,7 @@ models_npu_config=${basepath}/models_npu.cfg
models_compatibility_config=${basepath}/models_compatibility.cfg models_compatibility_config=${basepath}/models_compatibility.cfg
models_with_multiple_inputs_config=${basepath}/models_with_multiple_inputs.cfg models_with_multiple_inputs_config=${basepath}/models_with_multiple_inputs.cfg
models_for_process_only_config=${basepath}/models_for_process_only.cfg models_for_process_only_config=${basepath}/models_for_process_only.cfg
models_tf_weightquant_config=${basepath}/models_tf_weightquant.cfg
ms_models_path=${basepath}/ms_models ms_models_path=${basepath}/ms_models

@ -137,6 +137,7 @@ STATUS NodeInferShape(const std::unique_ptr<schema::CNodeT> &node, const std::ve
MS_LOG(ERROR) << "parameter is nullptr."; MS_LOG(ERROR) << "parameter is nullptr.";
return RET_ERROR; return RET_ERROR;
} }
parameter->quant_type_ = node->quantType;
parameter->infer_flag_ = true; parameter->infer_flag_ = true;
auto ret = KernelInferShape(inputs, outputs, parameter); auto ret = KernelInferShape(inputs, outputs, parameter);
fbb.Clear(); fbb.Clear();

@ -947,4 +947,48 @@ void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_coun
*desired_min = min; *desired_min = min;
} }
int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first) {
auto channels = dims[0];
if (!(*channel_at_first)) {
if (dims.size() != 2) {
MS_LOG(WARNING) << "unexpected dims size: " << dims.size();
*channel_at_first = true;
} else {
channels = dims[1];
}
} else {
channels = channel_cnt == -1 ? channels : channel_cnt;
}
return channels;
}
void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
int *channel_cnt) {
if (primitive->name() == ops::kNameMatMul && static_cast<int>(shapes.size()) == 2) {
auto matmul_prim = primitive->cast<std::shared_ptr<ops::MatMul>>();
MS_ASSERT(matmul_prim != nullptr);
*channel_at_first =
index != 1 || (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b());
} else if (primitive->name() == ops::kNameLSTM) {
if (index == 1 || index == 2) {
if (shapes.size() != 3) {
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
} else {
*channel_cnt = shapes[0] * shapes[1];
}
} else if (index == 3) {
if (shapes.size() != 2) {
MS_LOG(WARNING) << "unexpected lstm shape size: " << shapes.size();
} else {
auto tensor_elem_cnt = shapes[0] * shapes[1];
if (tensor_elem_cnt / 4 * 4 == tensor_elem_cnt) {
*channel_cnt = 4;
}
}
} else {
MS_LOG(WARNING) << "unexpected index of lstm: " << index;
}
}
}
} // namespace mindspore::lite::quant } // namespace mindspore::lite::quant

@ -28,6 +28,7 @@
#include <limits> #include <limits>
#include <utility> #include <utility>
#include "ops/mat_mul.h" #include "ops/mat_mul.h"
#include "ops/lstm.h"
#include "ops/fusion/full_connection.h" #include "ops/fusion/full_connection.h"
#include "tools/converter/quantizer/quantizer.h" #include "tools/converter/quantizer/quantizer.h"
#include "include/errorcode.h" #include "include/errorcode.h"
@ -113,6 +114,11 @@ STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_data
void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas, void GetMaxMinPerchannel(int channels, int one_filter_size, int i, int elem_count, const float *raw_datas,
bool channel_at_first, float *desired_max, float *desired_min); bool channel_at_first, float *desired_max, float *desired_min);
int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first);
void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
int *channel_cnt);
template <typename T> template <typename T>
T QuantizeData(const float originData, const schema::QuantParamT *quantParam) { T QuantizeData(const float originData, const schema::QuantParamT *quantParam) {
MS_ASSERT(quantParam != nullptr); MS_ASSERT(quantParam != nullptr);
@ -169,19 +175,12 @@ template <typename T>
STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type, STATUS DoPerChannelQuant(const tensor::TensorPtr &weight, const QuantType &quant_type,
std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min, std::vector<schema::QuantParamT> *quant_params, const int &quant_max, const int &quant_min,
const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas, const size_t &bit_num, const bool &k_means, std::vector<T> *quant_datas,
std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true) { std::vector<float> *dequant_datas, TypeId quant_data_type, bool channel_at_first = true,
int channel_cnt = -1) {
auto dims = weight->shape(); auto dims = weight->shape();
size_t elem_count = weight->DataSize(); size_t elem_count = weight->DataSize();
auto *raw_datas = static_cast<float *>(weight->data_c()); auto *raw_datas = static_cast<float *>(weight->data_c());
auto channels = dims[0]; auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
if (!channel_at_first) {
if (dims.size() != 2) {
MS_LOG(ERROR) << "unexpected dims size: " << dims.size();
channel_at_first = true;
} else {
channels = dims[1];
}
}
if (channels == 0) { if (channels == 0) {
MS_LOG(ERROR) << "channels is zero"; MS_LOG(ERROR) << "channels is zero";
return RET_ERROR; return RET_ERROR;
@ -358,15 +357,11 @@ STATUS QuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitiv
int ret = RET_OK; int ret = RET_OK;
if (per_channel) { if (per_channel) {
bool channel_at_first = true; bool channel_at_first = true;
if (primitive->name() == ops::kNameMatMul && weight->shape().size() == 2) { int channel_cnt = -1;
auto matmul_prim = primitive->cast<std::shared_ptr<ops::MatMul>>(); CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
MS_ASSERT(matmul_prim != nullptr);
channel_at_first =
index != 1 || (matmul_prim->GetAttr(ops::kTransposeB) != nullptr && matmul_prim->get_transpose_b());
}
// channel at first // channel at first
ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data, ret = DoPerChannelQuant<T>(weight, quant_type, &quant_params, quant_max, quant_min, bit_num, k_means, &quant_data,
&dequant_datas, quant_data_type, channel_at_first); &dequant_datas, quant_data_type, channel_at_first, channel_cnt);
if (ret == RET_CONTINUE) { if (ret == RET_CONTINUE) {
return ret; return ret;
} else if (ret != RET_OK) { } else if (ret != RET_OK) {

@ -126,45 +126,14 @@ STATUS WeightQuantizer::DoConvQuantize(const CNodePtr &cnode) {
} }
STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) { STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) {
auto already_quant = false;
tensor::TensorPtr tensor_info = nullptr;
ParameterPtr param_node = nullptr;
int index = 0;
for (size_t i = 1; i < cnode->size(); i++) { for (size_t i = 1; i < cnode->size(); i++) {
auto inputNode = cnode->input(i); auto inputNode = cnode->input(i);
if (inputNode->isa<Parameter>()) { if (inputNode->isa<Parameter>()) {
param_node = inputNode->cast<ParameterPtr>(); auto param_node = inputNode->cast<ParameterPtr>();
if ((param_node != nullptr) && param_node->has_default()) { if ((param_node != nullptr) && param_node->has_default()) {
tensor_info = std::static_pointer_cast<tensor::Tensor>(param_node->default_param()); auto tensor_info = std::static_pointer_cast<tensor::Tensor>(param_node->default_param());
if ((tensor_info == nullptr) || (tensor_info->Size() == 0) || (tensor_info->data_c() == nullptr)) { if ((tensor_info != nullptr) && (tensor_info->data_type() == mindspore::kNumberTypeFloat32) &&
tensor_info = nullptr; (tensor_info->Size() > 0) && (tensor_info->data_c() != nullptr)) {
continue;
} else if (tensor_info->data_type() == mindspore::kNumberTypeInt8 ||
tensor_info->data_type() == mindspore::kNumberTypeInt16) {
MS_LOG(INFO) << "the node: " << cnode->fullname_with_scope() << " input_i: " << i << "has been "
<< " quantized";
already_quant = true;
break;
} else if (tensor_info->data_type() != mindspore::kNumberTypeFloat32) {
tensor_info = nullptr;
continue;
} else {
index = i;
break;
}
}
}
}
if (already_quant) {
return RET_OK;
}
if (tensor_info == nullptr) {
MS_LOG(WARNING) << cnode->fullname_with_scope() << " No valid input param node !";
return RET_OK;
}
auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0)); auto primitive = GetValueNode<PrimitivePtr>(cnode->input(0));
if (primitive == nullptr) { if (primitive == nullptr) {
MS_LOG(ERROR) << "primitive is nullptr"; MS_LOG(ERROR) << "primitive is nullptr";
@ -172,25 +141,32 @@ STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) {
} }
auto status = RET_ERROR; auto status = RET_ERROR;
auto per_channel = true;
if (i == 3) {
per_channel = false;
}
if (type_id_ == kNumberTypeInt8) { if (type_id_ == kNumberTypeInt8) {
status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_,
type_id_, index - 1); bit_num_, per_channel, type_id_, i - 1);
} else if (type_id_ == kNumberTypeInt16) { } else if (type_id_ == kNumberTypeInt16) {
status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true, status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_,
type_id_, index - 1); bit_num_, per_channel, type_id_, i - 1);
} }
if (status == RET_CONTINUE) { if (status == RET_CONTINUE) {
return RET_OK; continue;
} else if (status != RET_OK) { } else if (status != RET_OK) {
MS_LOG(ERROR) << "QuantFilter failed : " << status; MS_LOG(ERROR) << cnode->fullname_with_scope() << " input " << i << " QuantFilter failed : " << status;
return status; return status;
} }
status = SetAbstract(tensor_info, param_node, primitive); status = SetAbstract(tensor_info, param_node, primitive);
if (status != RET_OK) { if (status != RET_OK) {
MS_LOG(ERROR) << "SetAbstract failed : " << status; MS_LOG(ERROR) << cnode->fullname_with_scope() << " input " << i << " SetAbstract failed : " << status;
return RET_ERROR; return RET_ERROR;
} }
}
}
}
}
return RET_OK; return RET_OK;
} }
@ -290,11 +266,11 @@ STATUS WeightQuantizer::ProcessLstmWeightByIndex(const CNodePtr &cnode, const Pr
} }
auto status = RET_ERROR; auto status = RET_ERROR;
if (type_id_ == kNumberTypeInt8) { if (type_id_ == kNumberTypeInt8) {
status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, false, status = QuantFilter<int8_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
type_id_, index - 1); type_id_, index - 1);
} else if (type_id_ == kNumberTypeInt16) { } else if (type_id_ == kNumberTypeInt16) {
status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, status = QuantFilter<int16_t>(tensor_info, primitive, QuantType_WeightQuant, quant_max_, quant_min_, bit_num_, true,
false, type_id_, index - 1); type_id_, index - 1);
} }
if (status == RET_CONTINUE) { if (status == RET_CONTINUE) {
return RET_OK; return RET_OK;

Loading…
Cancel
Save