|
|
|
@ -51,6 +51,7 @@ DEFINE_bool(use_analysis, true,
|
|
|
|
|
DEFINE_bool(record_benchmark, false,
|
|
|
|
|
"Record benchmark after profiling the model");
|
|
|
|
|
DEFINE_double(accuracy, 1e-3, "Result Accuracy.");
|
|
|
|
|
DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch.");
|
|
|
|
|
|
|
|
|
|
DECLARE_bool(profile);
|
|
|
|
|
DECLARE_int32(paddle_num_threads);
|
|
|
|
@ -198,61 +199,104 @@ void GetInputPerBatch(const std::vector<std::vector<int64_t>> &in,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TestOneThreadPrediction(
|
|
|
|
|
const PaddlePredictor::Config *config,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *outputs, bool use_analysis = true) {
|
|
|
|
|
int batch_size = FLAGS_batch_size;
|
|
|
|
|
int num_times = FLAGS_repeat;
|
|
|
|
|
auto predictor = CreateTestPredictor(config, use_analysis);
|
|
|
|
|
void ConvertPaddleTensorToZeroCopyTensor(
|
|
|
|
|
PaddlePredictor *predictor, const std::vector<PaddleTensor> &inputs) {
|
|
|
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
|
|
|
auto input = inputs[i];
|
|
|
|
|
auto tensor = predictor->GetInputTensor(input.name);
|
|
|
|
|
tensor->Reshape(input.shape);
|
|
|
|
|
tensor->SetLoD({input.lod});
|
|
|
|
|
if (input.dtype == PaddleDType::INT64) {
|
|
|
|
|
ZeroCopyTensorAssignData<int64_t>(tensor.get(), input.data);
|
|
|
|
|
} else if (input.dtype == PaddleDType::FLOAT32) {
|
|
|
|
|
ZeroCopyTensorAssignData<float>(tensor.get(), input.data);
|
|
|
|
|
} else {
|
|
|
|
|
LOG(ERROR) << "unsupported feed type " << input.dtype;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// warmup run
|
|
|
|
|
LOG(INFO) << "Warm up run...";
|
|
|
|
|
{
|
|
|
|
|
Timer warmup_timer;
|
|
|
|
|
warmup_timer.tic();
|
|
|
|
|
void PredictionWarmUp(PaddlePredictor *predictor,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *outputs, int num_threads,
|
|
|
|
|
int tid) {
|
|
|
|
|
int batch_size = FLAGS_batch_size;
|
|
|
|
|
LOG(INFO) << "Running thread " << tid << ", warm up run...";
|
|
|
|
|
if (FLAGS_zero_copy) {
|
|
|
|
|
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[0]);
|
|
|
|
|
}
|
|
|
|
|
Timer warmup_timer;
|
|
|
|
|
warmup_timer.tic();
|
|
|
|
|
if (!FLAGS_zero_copy) {
|
|
|
|
|
predictor->Run(inputs[0], outputs, batch_size);
|
|
|
|
|
PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1);
|
|
|
|
|
if (FLAGS_profile) {
|
|
|
|
|
paddle::platform::ResetProfiler();
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
predictor->ZeroCopyRun();
|
|
|
|
|
}
|
|
|
|
|
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
|
|
|
|
|
if (FLAGS_profile) {
|
|
|
|
|
paddle::platform::ResetProfiler();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG(INFO) << "Run " << num_times << " times...";
|
|
|
|
|
{
|
|
|
|
|
Timer run_timer;
|
|
|
|
|
run_timer.tic();
|
|
|
|
|
void PredictionRun(PaddlePredictor *predictor,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *outputs, int num_threads,
|
|
|
|
|
int tid) {
|
|
|
|
|
int batch_size = FLAGS_batch_size;
|
|
|
|
|
int num_times = FLAGS_repeat;
|
|
|
|
|
LOG(INFO) << "Thread " << tid << " run " << num_times << " times...";
|
|
|
|
|
Timer run_timer;
|
|
|
|
|
double elapsed_time = 0;
|
|
|
|
|
#ifdef WITH_GPERFTOOLS
|
|
|
|
|
ProfilerStart("paddle_inference.prof");
|
|
|
|
|
ProfilerStart("paddle_inference.prof");
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 0; i < num_times; i++) {
|
|
|
|
|
for (size_t j = 0; j < inputs.size(); j++) {
|
|
|
|
|
predictor->Run(inputs[j], outputs, batch_size);
|
|
|
|
|
if (!FLAGS_zero_copy) {
|
|
|
|
|
run_timer.tic();
|
|
|
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
|
|
|
for (int j = 0; j < num_times; j++) {
|
|
|
|
|
predictor->Run(inputs[i], outputs, batch_size);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
elapsed_time = run_timer.toc();
|
|
|
|
|
} else {
|
|
|
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
|
|
|
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]);
|
|
|
|
|
run_timer.tic();
|
|
|
|
|
for (int j = 0; j < num_times; j++) {
|
|
|
|
|
predictor->ZeroCopyRun();
|
|
|
|
|
}
|
|
|
|
|
elapsed_time += run_timer.toc();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#ifdef WITH_GPERFTOOLS
|
|
|
|
|
ProfilerStop();
|
|
|
|
|
ProfilerStop();
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
double latency = run_timer.toc() / (num_times > 1 ? num_times : 1);
|
|
|
|
|
PrintTime(batch_size, num_times, 1, 0, latency, inputs.size());
|
|
|
|
|
if (FLAGS_record_benchmark) {
|
|
|
|
|
Benchmark benchmark;
|
|
|
|
|
benchmark.SetName(FLAGS_model_name);
|
|
|
|
|
benchmark.SetBatchSize(batch_size);
|
|
|
|
|
benchmark.SetLatency(latency);
|
|
|
|
|
benchmark.PersistToFile("benchmark_record.txt");
|
|
|
|
|
}
|
|
|
|
|
PrintTime(batch_size, num_times, num_threads, tid, elapsed_time / num_times,
|
|
|
|
|
inputs.size());
|
|
|
|
|
if (FLAGS_record_benchmark) {
|
|
|
|
|
Benchmark benchmark;
|
|
|
|
|
benchmark.SetName(FLAGS_model_name);
|
|
|
|
|
benchmark.SetBatchSize(batch_size);
|
|
|
|
|
benchmark.SetLatency(elapsed_time / num_times);
|
|
|
|
|
benchmark.PersistToFile("benchmark_record.txt");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TestOneThreadPrediction(
|
|
|
|
|
const PaddlePredictor::Config *config,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *outputs, bool use_analysis = true) {
|
|
|
|
|
auto predictor = CreateTestPredictor(config, use_analysis);
|
|
|
|
|
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
|
|
|
|
|
PredictionRun(predictor.get(), inputs, outputs, 1, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void TestMultiThreadPrediction(
|
|
|
|
|
const PaddlePredictor::Config *config,
|
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &inputs,
|
|
|
|
|
std::vector<PaddleTensor> *outputs, int num_threads,
|
|
|
|
|
bool use_analysis = true) {
|
|
|
|
|
int batch_size = FLAGS_batch_size;
|
|
|
|
|
int num_times = FLAGS_repeat;
|
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
|
std::vector<std::unique_ptr<PaddlePredictor>> predictors;
|
|
|
|
|
predictors.emplace_back(CreateTestPredictor(config, use_analysis));
|
|
|
|
@ -260,7 +304,6 @@ void TestMultiThreadPrediction(
|
|
|
|
|
predictors.emplace_back(predictors.front()->Clone());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t total_time{0};
|
|
|
|
|
for (int tid = 0; tid < num_threads; ++tid) {
|
|
|
|
|
threads.emplace_back([&, tid]() {
|
|
|
|
|
// Each thread should have local inputs and outputs.
|
|
|
|
@ -273,34 +316,8 @@ void TestMultiThreadPrediction(
|
|
|
|
|
->SetMkldnnThreadID(static_cast<int>(tid) + 1);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// warmup run
|
|
|
|
|
LOG(INFO) << "Running thread " << tid << ", warm up run...";
|
|
|
|
|
{
|
|
|
|
|
Timer warmup_timer;
|
|
|
|
|
warmup_timer.tic();
|
|
|
|
|
predictor->Run(inputs[0], outputs, batch_size);
|
|
|
|
|
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
|
|
|
|
|
if (FLAGS_profile) {
|
|
|
|
|
paddle::platform::ResetProfiler();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LOG(INFO) << "Thread " << tid << " run " << num_times << " times...";
|
|
|
|
|
{
|
|
|
|
|
Timer timer;
|
|
|
|
|
timer.tic();
|
|
|
|
|
for (int i = 0; i < num_times; i++) {
|
|
|
|
|
for (const auto &input : inputs) {
|
|
|
|
|
ASSERT_TRUE(predictor->Run(input, &outputs_tid));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto time = timer.toc();
|
|
|
|
|
total_time += time;
|
|
|
|
|
PrintTime(batch_size, num_times, num_threads, tid, time / num_times,
|
|
|
|
|
inputs.size());
|
|
|
|
|
}
|
|
|
|
|
PredictionWarmUp(predictor.get(), inputs, outputs, num_threads, tid);
|
|
|
|
|
PredictionRun(predictor.get(), inputs, outputs, num_threads, tid);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < num_threads; ++i) {
|
|
|
|
|