|
|
@ -443,15 +443,21 @@ void TestPrediction(const PaddlePredictor::Config *config,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void SummarizeAccuracy(float avg_acc1_fp32, float avg_acc1_int8) {
|
|
|
|
void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
|
|
|
|
|
|
|
|
int compared_idx) {
|
|
|
|
|
|
|
|
PADDLE_ENFORCE(compared_idx <= 2 && compared_idx >= 1,
|
|
|
|
|
|
|
|
"Compare either top1 accuracy either mAP(top5), the "
|
|
|
|
|
|
|
|
"compared_idx is out of range");
|
|
|
|
|
|
|
|
std::string prefix = (compared_idx == 1) ? "top1_accuracy " : "mAP ";
|
|
|
|
LOG(INFO) << "--- Accuracy summary --- ";
|
|
|
|
LOG(INFO) << "--- Accuracy summary --- ";
|
|
|
|
LOG(INFO) << "Accepted top1 accuracy drop threshold: "
|
|
|
|
LOG(INFO) << "Accepted " << prefix
|
|
|
|
<< FLAGS_quantized_accuracy
|
|
|
|
<< "drop threshold: " << FLAGS_quantized_accuracy
|
|
|
|
<< ". (condition: (FP32_top1_acc - INT8_top1_acc) <= threshold)";
|
|
|
|
<< ". (condition: (FP32_" << prefix << " - INT8_" << prefix
|
|
|
|
LOG(INFO) << "FP32: avg top1 accuracy: " << std::fixed << std::setw(6)
|
|
|
|
<< ") <= threshold)";
|
|
|
|
<< std::setprecision(4) << avg_acc1_fp32;
|
|
|
|
LOG(INFO) << "FP32: avg " << prefix << std::fixed << std::setw(6)
|
|
|
|
LOG(INFO) << "INT8: avg top1 accuracy: " << std::fixed << std::setw(6)
|
|
|
|
<< std::setprecision(4) << avg_acc_fp32;
|
|
|
|
<< std::setprecision(4) << avg_acc1_int8;
|
|
|
|
LOG(INFO) << "INT8: avg " << prefix << std::fixed << std::setw(6)
|
|
|
|
|
|
|
|
<< std::setprecision(4) << avg_acc_int8;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void SummarizePerformance(float sample_latency_fp32,
|
|
|
|
void SummarizePerformance(float sample_latency_fp32,
|
|
|
@ -468,39 +474,39 @@ void SummarizePerformance(float sample_latency_fp32,
|
|
|
|
<< ", avg latency: " << sample_latency_int8 << " ms";
|
|
|
|
<< ", avg latency: " << sample_latency_int8 << " ms";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompareTopAccuracy(
|
|
|
|
void CompareAccuracy(
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &output_slots_quant,
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &output_slots_ref) {
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &output_slots_ref,
|
|
|
|
|
|
|
|
int compared_idx) {
|
|
|
|
if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0)
|
|
|
|
if (output_slots_quant.size() == 0 || output_slots_ref.size() == 0)
|
|
|
|
throw std::invalid_argument(
|
|
|
|
throw std::invalid_argument(
|
|
|
|
"CompareTopAccuracy: output_slots vector is empty.");
|
|
|
|
"CompareAccuracy: output_slots vector is empty.");
|
|
|
|
|
|
|
|
|
|
|
|
float total_accs1_quant{0};
|
|
|
|
float total_accs_quant{0};
|
|
|
|
float total_accs1_ref{0};
|
|
|
|
float total_accs_ref{0};
|
|
|
|
for (size_t i = 0; i < output_slots_quant.size(); ++i) {
|
|
|
|
for (size_t i = 0; i < output_slots_quant.size(); ++i) {
|
|
|
|
PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL);
|
|
|
|
PADDLE_ENFORCE(output_slots_quant[i].size() >= 2UL);
|
|
|
|
PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL);
|
|
|
|
PADDLE_ENFORCE(output_slots_ref[i].size() >= 2UL);
|
|
|
|
// second output: acc_top1
|
|
|
|
if (output_slots_quant[i][compared_idx].lod.size() > 0 ||
|
|
|
|
if (output_slots_quant[i][1].lod.size() > 0 ||
|
|
|
|
output_slots_ref[i][compared_idx].lod.size() > 0)
|
|
|
|
output_slots_ref[i][1].lod.size() > 0)
|
|
|
|
throw std::invalid_argument("CompareAccuracy: output has nonempty LoD.");
|
|
|
|
|
|
|
|
if (output_slots_quant[i][compared_idx].dtype !=
|
|
|
|
|
|
|
|
paddle::PaddleDType::FLOAT32 ||
|
|
|
|
|
|
|
|
output_slots_ref[i][compared_idx].dtype != paddle::PaddleDType::FLOAT32)
|
|
|
|
throw std::invalid_argument(
|
|
|
|
throw std::invalid_argument(
|
|
|
|
"CompareTopAccuracy: top1 accuracy output has nonempty LoD.");
|
|
|
|
"CompareAccuracy: output is of a wrong type.");
|
|
|
|
if (output_slots_quant[i][1].dtype != paddle::PaddleDType::FLOAT32 ||
|
|
|
|
total_accs_quant +=
|
|
|
|
output_slots_ref[i][1].dtype != paddle::PaddleDType::FLOAT32)
|
|
|
|
*static_cast<float *>(output_slots_quant[i][compared_idx].data.data());
|
|
|
|
throw std::invalid_argument(
|
|
|
|
total_accs_ref +=
|
|
|
|
"CompareTopAccuracy: top1 accuracy output is of a wrong type.");
|
|
|
|
*static_cast<float *>(output_slots_ref[i][compared_idx].data.data());
|
|
|
|
total_accs1_quant +=
|
|
|
|
}
|
|
|
|
*static_cast<float *>(output_slots_quant[i][1].data.data());
|
|
|
|
float avg_acc_quant = total_accs_quant / output_slots_quant.size();
|
|
|
|
total_accs1_ref +=
|
|
|
|
float avg_acc_ref = total_accs_ref / output_slots_ref.size();
|
|
|
|
*static_cast<float *>(output_slots_ref[i][1].data.data());
|
|
|
|
|
|
|
|
}
|
|
|
|
SummarizeAccuracy(avg_acc_ref, avg_acc_quant, compared_idx);
|
|
|
|
float avg_acc1_quant = total_accs1_quant / output_slots_quant.size();
|
|
|
|
CHECK_GT(avg_acc_ref, 0.0);
|
|
|
|
float avg_acc1_ref = total_accs1_ref / output_slots_ref.size();
|
|
|
|
CHECK_GT(avg_acc_quant, 0.0);
|
|
|
|
|
|
|
|
CHECK_LE(avg_acc_ref - avg_acc_quant, FLAGS_quantized_accuracy);
|
|
|
|
SummarizeAccuracy(avg_acc1_ref, avg_acc1_quant);
|
|
|
|
|
|
|
|
CHECK_GT(avg_acc1_ref, 0.0);
|
|
|
|
|
|
|
|
CHECK_GT(avg_acc1_quant, 0.0);
|
|
|
|
|
|
|
|
CHECK_LE(avg_acc1_ref - avg_acc1_quant, FLAGS_quantized_accuracy);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompareDeterministic(
|
|
|
|
void CompareDeterministic(
|
|
|
@ -536,7 +542,8 @@ void CompareNativeAndAnalysis(
|
|
|
|
|
|
|
|
|
|
|
|
void CompareQuantizedAndAnalysis(
|
|
|
|
void CompareQuantizedAndAnalysis(
|
|
|
|
const AnalysisConfig *config, const AnalysisConfig *qconfig,
|
|
|
|
const AnalysisConfig *config, const AnalysisConfig *qconfig,
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs) {
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
|
|
|
const int compared_idx = 1) {
|
|
|
|
PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size,
|
|
|
|
PADDLE_ENFORCE_EQ(inputs[0][0].shape[0], FLAGS_batch_size,
|
|
|
|
"Input data has to be packed batch by batch.");
|
|
|
|
"Input data has to be packed batch by batch.");
|
|
|
|
LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size
|
|
|
|
LOG(INFO) << "FP32 & INT8 prediction run: batch_size " << FLAGS_batch_size
|
|
|
@ -559,7 +566,7 @@ void CompareQuantizedAndAnalysis(
|
|
|
|
&sample_latency_int8);
|
|
|
|
&sample_latency_int8);
|
|
|
|
|
|
|
|
|
|
|
|
SummarizePerformance(sample_latency_fp32, sample_latency_int8);
|
|
|
|
SummarizePerformance(sample_latency_fp32, sample_latency_int8);
|
|
|
|
CompareTopAccuracy(quantized_outputs, analysis_outputs);
|
|
|
|
CompareAccuracy(quantized_outputs, analysis_outputs, compared_idx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void CompareNativeAndAnalysis(
|
|
|
|
void CompareNativeAndAnalysis(
|
|
|
|