From 07a4d8f8d6383db637a164c264f35cefea696c35 Mon Sep 17 00:00:00 2001
From: lidanqing <danqing.li@intel.com>
Date: Thu, 15 Aug 2019 15:16:35 +0200
Subject: [PATCH] Fix mAP problem in unit test of int8 object detection test
 (#18946)

* change the top1 comparison to mAP comparison
test=develop

* change the mobilenet-ssd tester demo data and batch_size settings
test=develop
---
 .../fluid/inference/tests/api/CMakeLists.txt  | 17 +++-
 .../analyzer_int8_object_detection_tester.cc  |  4 +-
 .../fluid/inference/tests/api/tester_helper.h | 77 ++++++++++---------
 3 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index 083e1bc59e..2310c9fbd1 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -43,6 +43,17 @@ function(inference_analysis_api_int8_test_run TARGET_NAME test_binary model_dir
 	     --iterations=2)
 endfunction()
 
+function(inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_binary model_dir data_path)
+	inference_analysis_test_run(${TARGET_NAME}
+	COMMAND ${test_binary}
+        ARGS --infer_model=${model_dir}/model
+             --infer_data=${data_path}
+             --warmup_batch_size=10
+             --batch_size=300
+             --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
+	     --iterations=1)
+endfunction()
+
 function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename)
 	inference_analysis_test_build(${TARGET_NAME} SRCS ${filename}
         EXTRA_DEPS ${INFERENCE_EXTRA_DEPS})
@@ -232,12 +243,12 @@ if(WITH_MKLDNN)
   inference_analysis_api_int8_test_run(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
   ### Object detection models
-  set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_100.bin")
+  set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin")
   set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection")
   set(INT8_OBJ_DETECT_TEST_APP_SRC "analyzer_int8_object_detection_tester.cc")
 
   # download dataset if necessary
-  download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_100.tar.gz")
+  download_int8_data(${INT8_DATA_DIR} "pascalvoc_val_head_300.tar.gz")
 
   # build test binary to be used in subsequent tests
   inference_analysis_api_int8_test_build(${INT8_OBJ_DETECT_TEST_APP} ${INT8_OBJ_DETECT_TEST_APP_SRC})
@@ -245,7 +256,7 @@ if(WITH_MKLDNN)
   # mobilenet-ssd int8
   set(INT8_MOBILENET_SSD_MODEL_DIR "${INT8_DATA_DIR}/mobilenet-ssd")
   download_int8_data(${INT8_MOBILENET_SSD_MODEL_DIR} "mobilenet_ssd_int8_model.tar.gz" )
-  inference_analysis_api_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH})
+  inference_analysis_api_object_dection_int8_test_run(test_analyzer_int8_mobilenet_ssd ${INT8_OBJ_DETECT_TEST_APP} ${INT8_MOBILENET_SSD_MODEL_DIR} ${PASCALVOC_DATA_PATH})
 
 endif()
 
diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
index ccb50d4043..334fdb6ce9 100644
--- a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
 
+// setting iterations to 0 means processing the whole dataset
 namespace paddle {
 namespace inference {
 namespace analysis {
@@ -273,7 +274,8 @@ TEST(Analyzer_int8_mobilenet_ssd, quantization) {
   q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
   q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
 
-  CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all);
+  // 0 is avg_cose, 1 is top1_acc, 2 is top5_acc or mAP
+  CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all, 2);
 }
 
 }  // namespace analysis
diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h
index 61cf10c317..2d4e75b827 100644
--- a/paddle/fluid/inference/tests/api/tester_helper.h
+++ b/paddle/fluid/inference/tests/api/tester_helper.h
@@ -443,15 +443,21 @@ void TestPrediction(const PaddlePredictor::Config *config,
   }
 }
 
-void SummarizeAccuracy(float avg_acc1_fp32, float avg_acc1_int8) {
+void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
+                       int compared_idx) {
+  PADDLE_ENFORCE(compared_idx <= 2 && compared_idx >= 1,
+                 "Compare either top1 accuracy either mAP(top5), the "
+                 "compared_idx is out of range");
+  std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP ";
   LOG(INFO) << "--- Accuracy summary --- ";
-  LOG(INFO) << "Accepted top1 accuracy drop threshold: "
-            << FLAGS_quantized_accuracy
-            << ". (condition: (FP32_top1_acc - INT8_top1_acc) <= threshold)";
-  LOG(INFO) << "FP32: avg top1 accuracy: " << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc1_fp32;
-  LOG(INFO) << "INT8: avg top1 accuracy: " << std::fixed << std::setw(6)
-            << std::setprecision(4) << avg_acc1_int8;
+  LOG(INFO) << "Accepted " << prefix
+            << "drop threshold: " << FLAGS_quantized_accuracy
+            << ". (condition: (FP32_" << prefix << " - INT8_" << prefix
+            << ") <= threshold)";
+  LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6)
+            << std::setprecision(4) << avg_acc_fp32;
+  LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6)
+            << std::setprecision(4) << avg_acc_int8;
 }
 
 void SummarizePerformance(float sample_latency_fp32,
@@ -468,39 +474,39 @@ void SummarizePerformance(float sample_latency_fp32,
             << ", avg latency: " << sample_latency_int8 << " ms";
 }
 
-void CompareTopAccuracy(
+void CompareAccuracy(
     const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
-    const std::vector<std::vector<PaddleTensor>> &output_slots_ref) {
+    const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
+    int compared_idx) {
   if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0)
     throw std::invalid_argument(
-        "CompareTopAccuracy: output_slots vector is empty.");
+        "CompareAccuracy: output_slots vector is empty.");
 
-  float total_accs1_quant{0};
-  float total_accs1_ref{0};
+  float total_accs_quant{0};
+  float total_accs_ref{0};
   for (size_t i = 0; i < output_slots_quant.size(); ++i) {
     PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL);
     PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL);
-    // second output: acc_top1
-    if (output_slots_quant[i][1].lod.size() > 0 ||
-        output_slots_ref[i][1].lod.size() > 0)
+    if (output_slots_quant[i][compared_idx].lod.size() > 0 ||
+        output_slots_ref[i][compared_idx].lod.size() > 0)
+      throw std::invalid_argument("CompareAccuracy: output has nonempty LoD.");
+    if (output_slots_quant[i][compared_idx].dtype !=
+            paddle::PaddleDType::FLOAT32 ||
+        output_slots_ref[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32)
       throw std::invalid_argument(
-          "CompareTopAccuracy: top1 accuracy output has nonempty LoD.");
-    if (output_slots_quant[i][1].dtype != paddle::PaddleDType::FLOAT32 ||
-        output_slots_ref[i][1].dtype != paddle::PaddleDType::FLOAT32)
-      throw std::invalid_argument(
-          "CompareTopAccuracy: top1 accuracy output is of a wrong type.");
-    total_accs1_quant +=
-        *static_cast<float *>(output_slots_quant[i][1].data.data());
-    total_accs1_ref +=
-        *static_cast<float *>(output_slots_ref[i][1].data.data());
-  }
-  float avg_acc1_quant = total_accs1_quant / output_slots_quant.size();
-  float avg_acc1_ref = total_accs1_ref / output_slots_ref.size();
-
-  SummarizeAccuracy(avg_acc1_ref, avg_acc1_quant);
-  CHECK_GT(avg_acc1_ref, 0.0);
-  CHECK_GT(avg_acc1_quant, 0.0);
-  CHECK_LE(avg_acc1_ref - avg_acc1_quant, FLAGS_quantized_accuracy);
+          "CompareAccuracy: output is of a wrong type.");
+    total_accs_quant +=
+        *static_cast<float *>(output_slots_quant[i][compared_idx].data.data());
+    total_accs_ref +=
+        *static_cast<float *>(output_slots_ref[i][compared_idx].data.data());
+  }
+  float avg_acc_quant = total_accs_quant / output_slots_quant.size();
+  float avg_acc_ref = total_accs_ref / output_slots_ref.size();
+
+  SummarizeAccuracy(avg_acc_ref, avg_acc_quant, compared_idx);
+  CHECK_GT(avg_acc_ref, 0.0);
+  CHECK_GT(avg_acc_quant, 0.0);
+  CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
 }
 
 void CompareDeterministic(
@@ -536,7 +542,8 @@ void CompareNativeAndAnalysis(
 
 void CompareQuantizedAndAnalysis(
     const AnalysisConfig *config, const AnalysisConfig *qconfig,
-    const std::vector<std::vector<PaddleTensor>> &inputs) {
+    const std::vector<std::vector<PaddleTensor>> &inputs,
+    const int compared_idx = 1) {
   PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size,
                     "Input data has to be packed batch by batch.");
   LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size
@@ -559,7 +566,7 @@ void CompareQuantizedAndAnalysis(
                           &sample_latency_int8);
 
   SummarizePerformance(sample_latency_fp32, sample_latency_int8);
-  CompareTopAccuracy(quantized_outputs, analysis_outputs);
+  CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
 }
 
 void CompareNativeAndAnalysis(