|
|
|
@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads);
|
|
|
|
|
namespace paddle {
|
|
|
|
|
namespace inference {
|
|
|
|
|
|
|
|
|
|
using paddle::framework::proto::VarType;
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
constexpr paddle::PaddleDType GetPaddleDType();
|
|
|
|
|
|
|
|
|
@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor(
|
|
|
|
|
void PredictionWarmUp(PaddlePredictor *predictor,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> *outputs,
|
|
|
|
|
int num_threads, int tid) {
|
|
|
|
|
int num_threads, int tid,
|
|
|
|
|
const VarType::Type data_type = VarType::FP32) {
|
|
|
|
|
int batch_size = FLAGS_batch_size;
|
|
|
|
|
LOG(INFO) << "Running thread " << tid << ", warm up run...";
|
|
|
|
|
if (FLAGS_zero_copy) {
|
|
|
|
@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor,
|
|
|
|
|
} else {
|
|
|
|
|
predictor->ZeroCopyRun();
|
|
|
|
|
}
|
|
|
|
|
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
|
|
|
|
|
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1, data_type);
|
|
|
|
|
if (FLAGS_profile) {
|
|
|
|
|
paddle::platform::ResetProfiler();
|
|
|
|
|
}
|
|
|
|
@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor,
|
|
|
|
|
void PredictionRun(PaddlePredictor *predictor,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> *outputs,
|
|
|
|
|
int num_threads, int tid) {
|
|
|
|
|
int num_threads, int tid,
|
|
|
|
|
const VarType::Type data_type = VarType::FP32) {
|
|
|
|
|
int num_times = FLAGS_repeat;
|
|
|
|
|
int iterations = inputs.size(); // process the whole dataset ...
|
|
|
|
|
if (FLAGS_iterations > 0 &&
|
|
|
|
@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor,
|
|
|
|
|
|
|
|
|
|
auto batch_latency = elapsed_time / (iterations * num_times);
|
|
|
|
|
PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
|
|
|
|
|
iterations);
|
|
|
|
|
iterations, data_type);
|
|
|
|
|
if (FLAGS_record_benchmark) {
|
|
|
|
|
Benchmark benchmark;
|
|
|
|
|
benchmark.SetName(FLAGS_model_name);
|
|
|
|
@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor,
|
|
|
|
|
void TestOneThreadPrediction(
|
|
|
|
|
const PaddlePredictor::Config *config,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true,
|
|
|
|
|
const VarType::Type data_type = VarType::FP32) {
|
|
|
|
|
auto predictor = CreateTestPredictor(config, use_analysis);
|
|
|
|
|
if (FLAGS_warmup) {
|
|
|
|
|
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
|
|
|
|
|
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type);
|
|
|
|
|
}
|
|
|
|
|
PredictionRun(predictor.get(), inputs, outputs, 1, 0);
|
|
|
|
|
PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TestMultiThreadPrediction(
|
|
|
|
@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis(
|
|
|
|
|
auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
|
|
|
|
|
PrintConfig(cfg, true);
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> analysis_outputs;
|
|
|
|
|
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true);
|
|
|
|
|
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32);
|
|
|
|
|
|
|
|
|
|
LOG(INFO) << "--- INT8 prediction start ---";
|
|
|
|
|
auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
|
|
|
|
|
PrintConfig(qcfg, true);
|
|
|
|
|
std::vector<std::vector<PaddleTensor>> quantized_outputs;
|
|
|
|
|
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true);
|
|
|
|
|
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true,
|
|
|
|
|
VarType::INT8);
|
|
|
|
|
|
|
|
|
|
LOG(INFO) << "--- comparing outputs --- ";
|
|
|
|
|
CompareTopAccuracy(quantized_outputs, analysis_outputs);
|
|
|
|
@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < a_size; i++) {
|
|
|
|
|
if (a.type() == framework::proto::VarType::FP32) {
|
|
|
|
|
if (a.type() == VarType::FP32) {
|
|
|
|
|
const auto *a_data = a.data<float>();
|
|
|
|
|
const auto *b_data = b.data<float>();
|
|
|
|
|
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
|
|
|
|
@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
|
|
|
|
|
b_data[i]);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
} else if (a.type() == framework::proto::VarType::INT64) {
|
|
|
|
|
} else if (a.type() == VarType::INT64) {
|
|
|
|
|
const auto *a_data = a.data<int64_t>();
|
|
|
|
|
const auto *b_data = b.data<int64_t>();
|
|
|
|
|
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
|
|
|
|
|