From 07a4d8f8d6383db637a164c264f35cefea696c35 Mon Sep 17 00:00:00 2001 From: lidanqing Date: Thu, 15 Aug 2019 15:16:35 +0200 Subject: [PATCH] Fix mAP problem in unit test of int8 object detection test (#18946) * change the top1 comparison to mAP comparison test=develop * change the mobilenet-ssd tester demo data and batch_size settings test=develop --- .../fluid/inference/tests/api/CMakeLists.txt | 17 +++- .../analyzer_int8_object_detection_tester.cc | 4 +- .../fluid/inference/tests/api/tester_helper.h | 77 ++++++++++--------- 3 files changed, 59 insertions(+), 39 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 083e1bc59e..2310c9fbd1 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -43,6 +43,17 @@ function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir --iterations=2) endfunction() +function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_binary model_dir data_path) + inference_analysis_test_run(${TARGET_NAME} + COMMAND ${test_binary} + ARGS --infer_model=${model_dir}/model + --infer_data=${data_path} + --warmup_batch_size=10 + --batch_size=300 + --paddle_num_threads=${CPU_NUM_THREADS_ON_CI} + --iterations=1) +endfunction() + function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename) inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}) @@ -232,12 +243,12 @@ if(WITH_MKLDNN) inference_analysis_api_int8_test_run(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH}) ### Object detection models - set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_100.bin") + set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin") set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection") set(INT8_OBJ_DETECT_TEST_APP_SRC "analyzer_int8_object_detection_tester.cc") # download dataset if necessary - download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_100.tar.gz") + download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz") # build test binary to be used in subsequent tests inference_analysis_api_int8_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC}) @@ -245,7 +256,7 @@ if(WITH_MKLDNN) # mobilenet-ssd int8 set(INT8_MOBILENET_SSD_MODEL_DIR "${INT8_DATA_DIR}/mobilenet-ssd") download_int8_data(${INT8_MOBILENET_SSD_MODEL_DIR} "mobilenet_ssd_int8_model.tar.gz" ) - inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) + inference_analysis_api_object_dection_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH}) endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc index ccb50d4043..334fdb6ce9 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" +// setting iterations to 0 means processing the whole dataset namespace paddle { namespace inference { namespace analysis { @@ -273,7 +274,8 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) { q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data); q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size); - CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all); + // 0 is avg_cose, 1 is top1_acc, 2 is top5_acc or mAP + CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all, 2); } } // namespace analysis diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 61cf10c317..2d4e75b827 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -443,15 +443,21 @@ void TestPrediction(const PaddlePredictor::Config *config, } } -void SummarizeAccuracy(float avg_acc1_fp32, float avg_acc1_int8) { +void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8, + int compared_idx) { + PADDLE_ENFORCE(compared_idx <= 2 && compared_idx >= 1, + "Compare either top1 accuracy either mAP(top5), the " + "compared_idx is out of range"); + std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP "; LOG(INFO) << "--- Accuracy summary --- "; - LOG(INFO) << "Accepted top1 accuracy drop threshold: " - << FLAGS_quantized_accuracy - << ". (condition: (FP32_top1_acc - INT8_top1_acc) <= threshold)"; - LOG(INFO) << "FP32: avg top1 accuracy: " << std::fixed << std::setw(6) - << std::setprecision(4) << avg_acc1_fp32; - LOG(INFO) << "INT8: avg top1 accuracy: " << std::fixed << std::setw(6) - << std::setprecision(4) << avg_acc1_int8; + LOG(INFO) << "Accepted " << prefix + << "drop threshold: " << FLAGS_quantized_accuracy + << ". (condition: (FP32_" << prefix << " - INT8_" << prefix + << ") <= threshold)"; + LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6) + << std::setprecision(4) << avg_acc_fp32; + LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6) + << std::setprecision(4) << avg_acc_int8; } void SummarizePerformance(float sample_latency_fp32, @@ -468,39 +474,39 @@ void SummarizePerformance(float sample_latency_fp32, << ", avg latency: " << sample_latency_int8 << " ms"; } -void CompareTopAccuracy( +void CompareAccuracy( const std::vector> &output_slots_quant, - const std::vector> &output_slots_ref) { + const std::vector> &output_slots_ref, + int compared_idx) { if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0) throw std::invalid_argument( - "CompareTopAccuracy: output_slots vector is empty."); + "CompareAccuracy: output_slots vector is empty."); - float total_accs1_quant{0}; - float total_accs1_ref{0}; + float total_accs_quant{0}; + float total_accs_ref{0}; for (size_t i = 0; i < output_slots_quant.size(); ++i) { PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL); PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL); - // second output: acc_top1 - if (output_slots_quant[i][1].lod.size() > 0 || - output_slots_ref[i][1].lod.size() > 0) + if (output_slots_quant[i][compared_idx].lod.size() > 0 || + output_slots_ref[i][compared_idx].lod.size() > 0) + throw std::invalid_argument("CompareAccuracy: output has nonempty LoD."); + if (output_slots_quant[i][compared_idx].dtype != + paddle::PaddleDType::FLOAT32 || + output_slots_ref[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32) throw std::invalid_argument( - "CompareTopAccuracy: top1 accuracy output has nonempty LoD."); - if (output_slots_quant[i][1].dtype != paddle::PaddleDType::FLOAT32 || - output_slots_ref[i][1].dtype != paddle::PaddleDType::FLOAT32) - throw std::invalid_argument( - "CompareTopAccuracy: top1 accuracy output is of a wrong type."); - total_accs1_quant += - *static_cast(output_slots_quant[i][1].data.data()); - total_accs1_ref += - *static_cast(output_slots_ref[i][1].data.data()); - } - float avg_acc1_quant = total_accs1_quant / output_slots_quant.size(); - float avg_acc1_ref = total_accs1_ref / output_slots_ref.size(); - - SummarizeAccuracy(avg_acc1_ref, avg_acc1_quant); - CHECK_GT(avg_acc1_ref, 0.0); - CHECK_GT(avg_acc1_quant, 0.0); - CHECK_LE(avg_acc1_ref - avg_acc1_quant, FLAGS_quantized_accuracy); + "CompareAccuracy: output is of a wrong type."); + total_accs_quant += + *static_cast(output_slots_quant[i][compared_idx].data.data()); + total_accs_ref += + *static_cast(output_slots_ref[i][compared_idx].data.data()); + } + float avg_acc_quant = total_accs_quant / output_slots_quant.size(); + float avg_acc_ref = total_accs_ref / output_slots_ref.size(); + + SummarizeAccuracy(avg_acc_ref, avg_acc_quant, compared_idx); + CHECK_GT(avg_acc_ref, 0.0); + CHECK_GT(avg_acc_quant, 0.0); + CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy); } void CompareDeterministic( @@ -536,7 +542,8 @@ void CompareNativeAndAnalysis( void CompareQuantizedAndAnalysis( const AnalysisConfig *config, const AnalysisConfig *qconfig, - const std::vector> &inputs) { + const std::vector> &inputs, + const int compared_idx = 1) { PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size, "Input data has to be packed batch by batch."); LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size @@ -559,7 +566,7 @@ void CompareQuantizedAndAnalysis( &sample_latency_int8); SummarizePerformance(sample_latency_fp32, sample_latency_int8); - CompareTopAccuracy(quantized_outputs, analysis_outputs); + CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx); } void CompareNativeAndAnalysis(