From 98fb8e58fd4fb91423d414d67f2a2684b6841020 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 May 2018 11:57:44 +0800 Subject: [PATCH 01/19] test infer nlp --- paddle/fluid/inference/io.cc | 2 +- .../fluid/inference/tests/book/CMakeLists.txt | 1 + .../tests/book/test_inference_nlp.cc | 85 +++++++++++++++++++ paddle/fluid/inference/tests/test_helper.h | 3 + 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/inference/tests/book/test_inference_nlp.cc diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 65db7c7b50..98780b6881 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -117,7 +117,7 @@ std::unique_ptr Load(framework::Executor* executor, std::string program_desc_str; VLOG(3) << "loading model from " << model_filename; ReadBinaryFile(model_filename, &program_desc_str); - + // LOG(INFO) << program_desc_str; std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt index dbb81462b8..90357f99d1 100644 --- a/paddle/fluid/inference/tests/book/CMakeLists.txt +++ b/paddle/fluid/inference/tests/book/CMakeLists.txt @@ -35,6 +35,7 @@ inference_test(image_classification ARGS vgg resnet) inference_test(label_semantic_roles) inference_test(recognize_digits ARGS mlp conv) inference_test(recommender_system) +inference_test(nlp) #inference_test(rnn_encoder_decoder) #inference_test(understand_sentiment ARGS conv) inference_test(word2vec) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc new file mode 100644 index 0000000000..0d6d0adfb2 --- /dev/null +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "gflags/gflags.h" +#include "gtest/gtest.h" +#include "paddle/fluid/inference/tests/test_helper.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +TEST(inference, understand_sentiment) { + if (FLAGS_dirname.empty()) { + LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + } + + LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; + std::string dirname = FLAGS_dirname; + + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + paddle::framework::LoDTensor words; + + paddle::framework::LoD lod{{0, 83}}; + int64_t word_dict_len = 198392; + SetupLoDTensor(&words, lod, static_cast(0), + static_cast(word_dict_len - 1)); + /* + std::vector srcdata{ + 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550, + 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463, + 6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784, + 784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, + 198391, + 6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029, + 1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, + 198391, + 6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550, + 9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, + 42468, + 4376, 10251, 10760}; + paddle::framework::LoD lod{{0, srcdata.size()}}; + words.set_lod(lod); + int64_t* pdata = + words.mutable_data({static_cast(srcdata.size()), 1}, + paddle::platform::CPUPlace()); + memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); + */ + LOG(INFO) << "number of input size:" << words.numel(); + std::vector cpu_feeds; + cpu_feeds.push_back(&words); + + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); + + int repeat = 100; + // Run inference on CPU + TestInference(dirname, cpu_feeds, + cpu_fetchs1, repeat); + LOG(INFO) << output1.lod(); + LOG(INFO) << output1.dims(); + +#ifdef PADDLE_WITH_CUDA + paddle::framework::LoDTensor output2; + std::vector cpu_fetchs2; + cpu_fetchs2.push_back(&output2); + + // Run inference on CUDA GPU + TestInference(dirname, cpu_feeds, cpu_fetchs2); + LOG(INFO) << output2.lod(); + LOG(INFO) << output2.dims(); + + CheckError(output1, output2); +#endif +} diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 01b8dc0be6..1f5551567c 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -182,6 +182,9 @@ void TestInference(const std::string& dirname, "init_program", paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); + // std::string binary_str; + // inference_program->Proto()->SerializeToString(&binary_str); + // LOG(INFO) << binary_str; if (use_mkldnn) { EnableMKLDNN(inference_program); } From 602e28bf1c30cd72e7378d6dc1071423086bdc73 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 May 2018 14:38:01 +0800 Subject: [PATCH 02/19] use the actual data --- .../tests/book/test_inference_nlp.cc | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 0d6d0adfb2..27bdd5528e 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include +#include #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -29,32 +31,28 @@ TEST(inference, understand_sentiment) { // 0. Call `paddle::framework::InitDevices()` initialize all the devices // In unittests, this is done in paddle/testing/paddle_gtest_main.cc paddle::framework::LoDTensor words; - - paddle::framework::LoD lod{{0, 83}}; - int64_t word_dict_len = 198392; - SetupLoDTensor(&words, lod, static_cast(0), - static_cast(word_dict_len - 1)); /* - std::vector srcdata{ - 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550, - 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463, - 6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784, - 784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, - 198391, - 6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029, - 1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, - 198391, - 6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550, - 9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, - 42468, - 4376, 10251, 10760}; - paddle::framework::LoD lod{{0, srcdata.size()}}; - words.set_lod(lod); - int64_t* pdata = - words.mutable_data({static_cast(srcdata.size()), 1}, - paddle::platform::CPUPlace()); - memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); - */ + paddle::framework::LoD lod{{0, 83}}; + int64_t word_dict_len = 198392; + SetupLoDTensor(&words, lod, static_cast(0), + static_cast(word_dict_len - 1)); + */ + std::vector srcdata{ + 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550, + 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463, + 6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784, + 784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, 198391, + 6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029, + 1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, 198391, + 6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550, + 9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, + 4376, 10251, 10760}; + paddle::framework::LoD lod{{0, srcdata.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(srcdata.size()), 1}, paddle::platform::CPUPlace()); + memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); + LOG(INFO) << "number of input size:" << words.numel(); std::vector cpu_feeds; cpu_feeds.push_back(&words); From ce20dfa236a0bf874d8580a7861b7a85dffdf74c Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 May 2018 15:17:06 +0800 Subject: [PATCH 03/19] enable more choices --- .../tests/book/test_inference_nlp.cc | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 27bdd5528e..c942b43f17 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -19,6 +19,10 @@ limitations under the License. */ #include "paddle/fluid/inference/tests/test_helper.h" DEFINE_string(dirname, "", "Directory of the inference model."); +DEFINE_int32(repeat, 100, "Running the inference program repeat times"); +DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); +DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); +DEFINE_bool(prepare_context, true, "Prepare Context before executor"); TEST(inference, understand_sentiment) { if (FLAGS_dirname.empty()) { @@ -61,10 +65,29 @@ TEST(inference, understand_sentiment) { std::vector cpu_fetchs1; cpu_fetchs1.push_back(&output1); - int repeat = 100; // Run inference on CPU - TestInference(dirname, cpu_feeds, - cpu_fetchs1, repeat); + const bool model_combined = false; + if (FLAGS_prepare_vars) { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } else { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } LOG(INFO) << output1.lod(); LOG(INFO) << output1.dims(); From 400f5e7c3ce21ba63bee62a599a82c4a0bbc299d Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 25 May 2018 21:08:49 +0800 Subject: [PATCH 04/19] add threads test --- .../tests/book/test_inference_nlp.cc | 135 +++++++++--------- 1 file changed, 67 insertions(+), 68 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index c942b43f17..ca02e38ede 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include +#include // NOLINT #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -31,76 +32,74 @@ TEST(inference, understand_sentiment) { LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; std::string dirname = FLAGS_dirname; - - // 0. Call `paddle::framework::InitDevices()` initialize all the devices - // In unittests, this is done in paddle/testing/paddle_gtest_main.cc - paddle::framework::LoDTensor words; - /* - paddle::framework::LoD lod{{0, 83}}; - int64_t word_dict_len = 198392; - SetupLoDTensor(&words, lod, static_cast(0), - static_cast(word_dict_len - 1)); - */ - std::vector srcdata{ - 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, 1550, - 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, 396, 6463, - 6463, 1550, 198391, 6463, 22564, 1612, 291, 68, 164, 784, - 784, 1550, 198391, 6463, 13659, 3362, 42468, 6189, 2209, 198391, - 6463, 2209, 2209, 198391, 6463, 2209, 1062, 3029, 1831, 3029, - 1065, 2281, 100, 11216, 1110, 56, 10869, 9811, 100, 198391, - 6463, 100, 9280, 100, 288, 40031, 1680, 1335, 100, 1550, - 9280, 7265, 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, - 4376, 10251, 10760}; - paddle::framework::LoD lod{{0, srcdata.size()}}; - words.set_lod(lod); - int64_t* pdata = words.mutable_data( - {static_cast(srcdata.size()), 1}, paddle::platform::CPUPlace()); - memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); - - LOG(INFO) << "number of input size:" << words.numel(); - std::vector cpu_feeds; - cpu_feeds.push_back(&words); - - paddle::framework::LoDTensor output1; - std::vector cpu_fetchs1; - cpu_fetchs1.push_back(&output1); - - // Run inference on CPU const bool model_combined = false; - if (FLAGS_prepare_vars) { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } else { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } - LOG(INFO) << output1.lod(); - LOG(INFO) << output1.dims(); + int total_work = 100; + int num_threads = 10; + int work_per_thread = total_work / num_threads; + std::vector> infer_threads; + for (int i = 0; i < num_threads; ++i) { + infer_threads.emplace_back(new std::thread([&, i]() { + for (int j = 0; j < work_per_thread; ++j) { + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + paddle::framework::LoDTensor words; + /* + paddle::framework::LoD lod{{0, 83}}; + int64_t word_dict_len = 198392; + SetupLoDTensor(&words, lod, static_cast(0), + static_cast(word_dict_len - 1)); + */ + std::vector srcdata{ + 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, + 1550, 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, + 396, 6463, 6463, 1550, 198391, 6463, 22564, 1612, 291, + 68, 164, 784, 784, 1550, 198391, 6463, 13659, 3362, + 42468, 6189, 2209, 198391, 6463, 2209, 2209, 198391, 6463, + 2209, 1062, 3029, 1831, 3029, 1065, 2281, 100, 11216, + 1110, 56, 10869, 9811, 100, 198391, 6463, 100, 9280, + 100, 288, 40031, 1680, 1335, 100, 1550, 9280, 7265, + 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, 4376, + 10251, 10760}; + paddle::framework::LoD lod{{0, srcdata.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(srcdata.size()), 1}, + paddle::platform::CPUPlace()); + memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); -#ifdef PADDLE_WITH_CUDA - paddle::framework::LoDTensor output2; - std::vector cpu_fetchs2; - cpu_fetchs2.push_back(&output2); + LOG(INFO) << "number of input size:" << words.numel(); + std::vector cpu_feeds; + cpu_feeds.push_back(&words); - // Run inference on CUDA GPU - TestInference(dirname, cpu_feeds, cpu_fetchs2); - LOG(INFO) << output2.lod(); - LOG(INFO) << output2.dims(); + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); - CheckError(output1, output2); -#endif + // Run inference on CPU + if (FLAGS_prepare_vars) { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } else { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } + LOG(INFO) << output1.lod(); + LOG(INFO) << output1.dims(); + } + })); + } } From c00843f4e8860d7abff0077168942fa99ef37154 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 29 May 2018 17:43:01 +0800 Subject: [PATCH 05/19] enable multi-threads --- .../tests/book/test_inference_nlp.cc | 12 ++++++++ paddle/fluid/inference/tests/test_helper.h | 29 ------------------- 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index ca02e38ede..6ff8a18cdb 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -25,6 +25,12 @@ DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_context, true, "Prepare Context before executor"); +inline double get_current_ms() { + struct timeval time; + gettimeofday(&time, NULL); + return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; +} + TEST(inference, understand_sentiment) { if (FLAGS_dirname.empty()) { LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; @@ -102,4 +108,10 @@ TEST(inference, understand_sentiment) { } })); } + auto start_ms = get_current_ms(); + for (int i = 0; i < num_threads; ++i) { + infer_threads[i]->join(); + } + auto stop_ms = get_current_ms(); + LOG(INFO) << "total: " << stop_ms - start_ms << " ms"; } diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 1f5551567c..dd3a7a584a 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -156,27 +156,10 @@ void TestInference(const std::string& dirname, auto executor = paddle::framework::Executor(place); auto* scope = new paddle::framework::Scope(); - // Profile the performance - paddle::platform::ProfilerState state; - if (paddle::platform::is_cpu_place(place)) { - state = paddle::platform::ProfilerState::kCPU; - } else { -#ifdef PADDLE_WITH_CUDA - state = paddle::platform::ProfilerState::kAll; - // The default device_id of paddle::platform::CUDAPlace is 0. - // Users can get the device_id using: - // int device_id = place.GetDeviceId(); - paddle::platform::SetDeviceId(0); -#else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); -#endif - } - // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; // Enable the profiler - paddle::platform::EnableProfiler(state); { paddle::platform::RecordEvent record_event( "init_program", @@ -189,10 +172,6 @@ void TestInference(const std::string& dirname, EnableMKLDNN(inference_program); } } - // Disable the profiler and print the timing information - paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, - "load_program_profiler"); - paddle::platform::ResetProfiler(); // 3. Get the feed_target_names and fetch_target_names const std::vector& feed_target_names = @@ -233,9 +212,6 @@ void TestInference(const std::string& dirname, true, CreateVars); } - // Enable the profiler - paddle::platform::EnableProfiler(state); - // Run repeat times to profile the performance for (int i = 0; i < repeat; ++i) { paddle::platform::RecordEvent record_event( @@ -252,11 +228,6 @@ void TestInference(const std::string& dirname, CreateVars); } } - - // Disable the profiler and print the timing information - paddle::platform::DisableProfiler( - paddle::platform::EventSortingKey::kDefault, "run_inference_profiler"); - paddle::platform::ResetProfiler(); } delete scope; From 77599415ba1b93715fa0626e147865c088970ee6 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 30 May 2018 12:15:10 +0800 Subject: [PATCH 06/19] enable read dataset --- .../tests/book/test_inference_nlp.cc | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 6ff8a18cdb..95cdeb4ad1 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -14,7 +14,12 @@ limitations under the License. */ #include #include +#include +#include +#include +#include #include // NOLINT +#include #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -31,16 +36,37 @@ inline double get_current_ms() { return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } +void read_data( + std::vector>* out, + const std::string& filename = "/home/tangjian/paddle-tj/out.ids.txt") { + using namespace std; // NOLINT + fstream fin(filename); + string line; + out->clear(); + while (getline(fin, line)) { + istringstream iss(line); + vector ids; + string field; + while (getline(iss, field, ' ')) { + ids.push_back(stoi(field)); + } + out->push_back(ids); + } +} + TEST(inference, understand_sentiment) { if (FLAGS_dirname.empty()) { LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; } - + std::vector> inputdatas; + read_data(&inputdatas); + LOG(INFO) << "---------- dataset size: " << inputdatas.size(); LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; std::string dirname = FLAGS_dirname; + const bool model_combined = false; - int total_work = 100; - int num_threads = 10; + int total_work = 10; + int num_threads = 2; int work_per_thread = total_work / num_threads; std::vector> infer_threads; for (int i = 0; i < num_threads; ++i) { From 4d11c8e9c64f65b6701edb1ba44cefdff0423acb Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 31 May 2018 15:11:46 +0800 Subject: [PATCH 07/19] retest single thread --- .../tests/book/test_inference_nlp.cc | 224 +++++++++++------- 1 file changed, 143 insertions(+), 81 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 95cdeb4ad1..e216e9dbe6 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -30,16 +30,19 @@ DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_context, true, "Prepare Context before executor"); +DEFINE_int32(num_threads, 1, "Number of threads should be used"); + inline double get_current_ms() { struct timeval time; gettimeofday(&time, NULL); return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } -void read_data( - std::vector>* out, - const std::string& filename = "/home/tangjian/paddle-tj/out.ids.txt") { +// return size of total words +size_t read_datasets(std::vector* out, + const std::string& filename) { using namespace std; // NOLINT + size_t sz = 0; fstream fin(filename); string line; out->clear(); @@ -50,94 +53,153 @@ void read_data( while (getline(iss, field, ' ')) { ids.push_back(stoi(field)); } - out->push_back(ids); + if (ids.size() >= 1024 || out->size() >= 100) { + continue; + } + + paddle::framework::LoDTensor words; + paddle::framework::LoD lod{{0, ids.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(ids.size()), 1}, paddle::platform::CPUPlace()); + memcpy(pdata, ids.data(), words.numel() * sizeof(int64_t)); + out->emplace_back(words); + sz += ids.size(); } + return sz; +} + +void test_multi_threads() { + /* + size_t jobs_per_thread = std::min(inputdatas.size() / FLAGS_num_threads, + inputdatas.size()); + std::vector workers(FLAGS_num_threads, jobs_per_thread); + workers[FLAGS_num_threads - 1] += inputdatas.size() % FLAGS_num_threads; + + std::vector> infer_threads; + + for (size_t i = 0; i < workers.size(); ++i) { + infer_threads.emplace_back(new std::thread([&, i]() { + size_t start = i * jobs_per_thread; + for (size_t j = start; j < start + workers[i]; ++j ) { + // 0. Call `paddle::framework::InitDevices()` initialize all the + devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + paddle::framework::LoDTensor words; + auto& srcdata = inputdatas[j]; + paddle::framework::LoD lod{{0, srcdata.size()}}; + words.set_lod(lod); + int64_t* pdata = words.mutable_data( + {static_cast(srcdata.size()), 1}, + paddle::platform::CPUPlace()); + memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); + + LOG(INFO) << "thread id: " << i << ", words size:" << words.numel(); + std::vector cpu_feeds; + cpu_feeds.push_back(&words); + + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); + + // Run inference on CPU + if (FLAGS_prepare_vars) { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } else { + if (FLAGS_prepare_context) { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } else { + TestInference( + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, + FLAGS_use_mkldnn); + } + } + //LOG(INFO) << output1.lod(); + //LOG(INFO) << output1.dims(); + } + })); + } + auto start_ms = get_current_ms(); + for (int i = 0; i < FLAGS_num_threads; ++i) { + infer_threads[i]->join(); + } + auto stop_ms = get_current_ms(); + LOG(INFO) << "total: " << stop_ms - start_ms << " ms";*/ } -TEST(inference, understand_sentiment) { +TEST(inference, nlp) { if (FLAGS_dirname.empty()) { LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; } - std::vector> inputdatas; - read_data(&inputdatas); - LOG(INFO) << "---------- dataset size: " << inputdatas.size(); LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; std::string dirname = FLAGS_dirname; + std::vector datasets; + size_t num_total_words = + read_datasets(&datasets, "/home/tangjian/paddle-tj/out.ids.txt"); + LOG(INFO) << "Number of dataset samples(seq len<1024): " << datasets.size(); + LOG(INFO) << "Total number of words: " << num_total_words; + const bool model_combined = false; - int total_work = 10; - int num_threads = 2; - int work_per_thread = total_work / num_threads; - std::vector> infer_threads; - for (int i = 0; i < num_threads; ++i) { - infer_threads.emplace_back(new std::thread([&, i]() { - for (int j = 0; j < work_per_thread; ++j) { - // 0. Call `paddle::framework::InitDevices()` initialize all the devices - // In unittests, this is done in paddle/testing/paddle_gtest_main.cc - paddle::framework::LoDTensor words; - /* - paddle::framework::LoD lod{{0, 83}}; - int64_t word_dict_len = 198392; - SetupLoDTensor(&words, lod, static_cast(0), - static_cast(word_dict_len - 1)); - */ - std::vector srcdata{ - 784, 784, 1550, 6463, 56, 75693, 6189, 784, 784, - 1550, 198391, 6463, 42468, 4376, 10251, 10760, 6189, 297, - 396, 6463, 6463, 1550, 198391, 6463, 22564, 1612, 291, - 68, 164, 784, 784, 1550, 198391, 6463, 13659, 3362, - 42468, 6189, 2209, 198391, 6463, 2209, 2209, 198391, 6463, - 2209, 1062, 3029, 1831, 3029, 1065, 2281, 100, 11216, - 1110, 56, 10869, 9811, 100, 198391, 6463, 100, 9280, - 100, 288, 40031, 1680, 1335, 100, 1550, 9280, 7265, - 244, 1550, 198391, 6463, 1550, 198391, 6463, 42468, 4376, - 10251, 10760}; - paddle::framework::LoD lod{{0, srcdata.size()}}; - words.set_lod(lod); - int64_t* pdata = words.mutable_data( - {static_cast(srcdata.size()), 1}, - paddle::platform::CPUPlace()); - memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); - - LOG(INFO) << "number of input size:" << words.numel(); - std::vector cpu_feeds; - cpu_feeds.push_back(&words); - - paddle::framework::LoDTensor output1; - std::vector cpu_fetchs1; - cpu_fetchs1.push_back(&output1); - - // Run inference on CPU - if (FLAGS_prepare_vars) { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } else { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } - LOG(INFO) << output1.lod(); - LOG(INFO) << output1.dims(); - } - })); + + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // 1. Define place, executor, scope + auto place = paddle::platform::CPUPlace(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + + // 2. Initialize the inference_program and load parameters + std::unique_ptr inference_program; + inference_program = InitProgram(&executor, scope, dirname, model_combined); + if (FLAGS_use_mkldnn) { + EnableMKLDNN(inference_program); } - auto start_ms = get_current_ms(); - for (int i = 0; i < num_threads; ++i) { - infer_threads[i]->join(); + + if (FLAGS_num_threads > 1) { + test_multi_threads(); + } else { + if (FLAGS_prepare_vars) { + executor.CreateVariables(*inference_program, scope, 0); + } + // always prepare context and burning first time + std::unique_ptr ctx; + ctx = executor.Prepare(*inference_program, 0); + + // preapre fetch + const std::vector& fetch_target_names = + inference_program->GetFetchTargetNames(); + PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL); + std::map fetch_targets; + paddle::framework::LoDTensor outtensor; + fetch_targets[fetch_target_names[0]] = &outtensor; + + // prepare feed + const std::vector& feed_target_names = + inference_program->GetFeedTargetNames(); + PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); + std::map feed_targets; + + // for data and run + auto start_ms = get_current_ms(); + for (size_t i = 0; i < datasets.size(); ++i) { + feed_targets[feed_target_names[0]] = &(datasets[i]); + executor.RunPreparedContext(ctx.get(), scope, &feed_targets, + &fetch_targets, !FLAGS_prepare_vars); + } + auto stop_ms = get_current_ms(); + LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60 + << " min, avg time per seq: " + << (stop_ms - start_ms) / datasets.size() << " ms"; } - auto stop_ms = get_current_ms(); - LOG(INFO) << "total: " << stop_ms - start_ms << " ms"; + delete scope; } From d13dd3b6a7ee81d4c106035ec0bad2c581ea795c Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 31 May 2018 16:04:47 +0800 Subject: [PATCH 08/19] revert profiling --- paddle/fluid/inference/tests/test_helper.h | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index dd3a7a584a..1f5551567c 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -156,10 +156,27 @@ void TestInference(const std::string& dirname, auto executor = paddle::framework::Executor(place); auto* scope = new paddle::framework::Scope(); + // Profile the performance + paddle::platform::ProfilerState state; + if (paddle::platform::is_cpu_place(place)) { + state = paddle::platform::ProfilerState::kCPU; + } else { +#ifdef PADDLE_WITH_CUDA + state = paddle::platform::ProfilerState::kAll; + // The default device_id of paddle::platform::CUDAPlace is 0. + // Users can get the device_id using: + // int device_id = place.GetDeviceId(); + paddle::platform::SetDeviceId(0); +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif + } + // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; // Enable the profiler + paddle::platform::EnableProfiler(state); { paddle::platform::RecordEvent record_event( "init_program", @@ -172,6 +189,10 @@ void TestInference(const std::string& dirname, EnableMKLDNN(inference_program); } } + // Disable the profiler and print the timing information + paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, + "load_program_profiler"); + paddle::platform::ResetProfiler(); // 3. Get the feed_target_names and fetch_target_names const std::vector& feed_target_names = @@ -212,6 +233,9 @@ void TestInference(const std::string& dirname, true, CreateVars); } + // Enable the profiler + paddle::platform::EnableProfiler(state); + // Run repeat times to profile the performance for (int i = 0; i < repeat; ++i) { paddle::platform::RecordEvent record_event( @@ -228,6 +252,11 @@ void TestInference(const std::string& dirname, CreateVars); } } + + // Disable the profiler and print the timing information + paddle::platform::DisableProfiler( + paddle::platform::EventSortingKey::kDefault, "run_inference_profiler"); + paddle::platform::ResetProfiler(); } delete scope; From 708bec2e56c6a856f628ad8b650b0bf04a3df975 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 31 May 2018 16:33:54 +0800 Subject: [PATCH 09/19] add test --- paddle/fluid/inference/tests/book/test_inference_nlp.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index e216e9dbe6..990d45964e 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -200,6 +200,14 @@ TEST(inference, nlp) { LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60 << " min, avg time per seq: " << (stop_ms - start_ms) / datasets.size() << " ms"; + { // just for test + auto* scope = new paddle::framework::Scope(); + paddle::framework::LoDTensor outtensor; + TestInference( + dirname, {&(datasets[0])}, {&outtensor}, FLAGS_repeat, model_combined, + false); + delete scope; + } } delete scope; } From 733718c3e724fdd84355010e76ddd17e5b60ef2c Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 31 May 2018 19:12:23 +0800 Subject: [PATCH 10/19] remove the ugly test --- .../inference/tests/book/test_inference_nlp.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 990d45964e..5241661fb3 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -53,7 +53,7 @@ size_t read_datasets(std::vector* out, while (getline(iss, field, ' ')) { ids.push_back(stoi(field)); } - if (ids.size() >= 1024 || out->size() >= 100) { + if (ids.size() >= 1024 ) { continue; } @@ -200,14 +200,14 @@ TEST(inference, nlp) { LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60 << " min, avg time per seq: " << (stop_ms - start_ms) / datasets.size() << " ms"; - { // just for test - auto* scope = new paddle::framework::Scope(); - paddle::framework::LoDTensor outtensor; - TestInference( - dirname, {&(datasets[0])}, {&outtensor}, FLAGS_repeat, model_combined, - false); - delete scope; - } +// { // just for test +// auto* scope = new paddle::framework::Scope(); +// paddle::framework::LoDTensor outtensor; +// TestInference( +// dirname, {&(datasets[0])}, {&outtensor}, FLAGS_repeat, model_combined, +// false); +// delete scope; +// } } delete scope; } From 5387562576de020a35f864a07f14802b68ee398d Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 14:07:41 +0800 Subject: [PATCH 11/19] add multi-thread test --- .../tests/book/test_inference_nlp.cc | 157 ++++++++---------- 1 file changed, 72 insertions(+), 85 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 5241661fb3..4e92d6a17b 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -15,11 +15,7 @@ limitations under the License. */ #include #include #include -#include -#include -#include #include // NOLINT -#include #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" @@ -41,19 +37,18 @@ inline double get_current_ms() { // return size of total words size_t read_datasets(std::vector* out, const std::string& filename) { - using namespace std; // NOLINT size_t sz = 0; - fstream fin(filename); - string line; + std::fstream fin(filename); + std::string line; out->clear(); while (getline(fin, line)) { - istringstream iss(line); - vector ids; - string field; + std::istringstream iss(line); + std::vector ids; + std::string field; while (getline(iss, field, ' ')) { ids.push_back(stoi(field)); } - if (ids.size() >= 1024 ) { + if (ids.size() >= 1024) { continue; } @@ -69,72 +64,61 @@ size_t read_datasets(std::vector* out, return sz; } -void test_multi_threads() { - /* - size_t jobs_per_thread = std::min(inputdatas.size() / FLAGS_num_threads, - inputdatas.size()); - std::vector workers(FLAGS_num_threads, jobs_per_thread); - workers[FLAGS_num_threads - 1] += inputdatas.size() % FLAGS_num_threads; - - std::vector> infer_threads; - - for (size_t i = 0; i < workers.size(); ++i) { - infer_threads.emplace_back(new std::thread([&, i]() { - size_t start = i * jobs_per_thread; - for (size_t j = start; j < start + workers[i]; ++j ) { - // 0. Call `paddle::framework::InitDevices()` initialize all the - devices - // In unittests, this is done in paddle/testing/paddle_gtest_main.cc - paddle::framework::LoDTensor words; - auto& srcdata = inputdatas[j]; - paddle::framework::LoD lod{{0, srcdata.size()}}; - words.set_lod(lod); - int64_t* pdata = words.mutable_data( - {static_cast(srcdata.size()), 1}, - paddle::platform::CPUPlace()); - memcpy(pdata, srcdata.data(), words.numel() * sizeof(int64_t)); - - LOG(INFO) << "thread id: " << i << ", words size:" << words.numel(); - std::vector cpu_feeds; - cpu_feeds.push_back(&words); - - paddle::framework::LoDTensor output1; - std::vector cpu_fetchs1; - cpu_fetchs1.push_back(&output1); - - // Run inference on CPU - if (FLAGS_prepare_vars) { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } else { - if (FLAGS_prepare_context) { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } else { - TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, model_combined, - FLAGS_use_mkldnn); - } - } - //LOG(INFO) << output1.lod(); - //LOG(INFO) << output1.dims(); - } - })); - } - auto start_ms = get_current_ms(); - for (int i = 0; i < FLAGS_num_threads; ++i) { - infer_threads[i]->join(); +void ThreadRunInfer( + const int tid, paddle::framework::Executor* executor, + paddle::framework::Scope* scope, + const std::unique_ptr& inference_program, + const std::vector>& jobs) { + auto copy_program = std::unique_ptr( + new paddle::framework::ProgramDesc(*inference_program)); + std::string feed_holder_name = "feed_" + paddle::string::to_string(tid); + std::string fetch_holder_name = "fetch_" + paddle::string::to_string(tid); + copy_program->SetFeedHolderName(feed_holder_name); + copy_program->SetFetchHolderName(fetch_holder_name); + + // 3. Get the feed_target_names and fetch_target_names + const std::vector& feed_target_names = + copy_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + copy_program->GetFetchTargetNames(); + + PADDLE_ENFORCE_EQ(fetch_target_names.size(), 1UL); + std::map fetch_targets; + paddle::framework::LoDTensor outtensor; + fetch_targets[fetch_target_names[0]] = &outtensor; + + std::map feed_targets; + PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); + + auto& inputs = jobs[tid]; + auto start_ms = get_current_ms(); + for (size_t i = 0; i < inputs.size(); ++i) { + feed_targets[feed_target_names[0]] = inputs[i]; + executor->Run(*copy_program, scope, &feed_targets, &fetch_targets, true, + true, feed_holder_name, fetch_holder_name); + } + auto stop_ms = get_current_ms(); + LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() + << " samples, avg time per sample: " + + << (stop_ms - start_ms) / inputs.size() << " ms"; +} + +void bcast_datasets( + const std::vector& datasets, + std::vector>* jobs, + const int num_threads) { + size_t s = 0; + jobs->resize(num_threads); + while (s < datasets.size()) { + for (auto it = jobs->begin(); it != jobs->end(); it++) { + it->emplace_back(&datasets[s]); + s++; + if (s >= datasets.size()) { + break; + } } - auto stop_ms = get_current_ms(); - LOG(INFO) << "total: " << stop_ms - start_ms << " ms";*/ + } } TEST(inference, nlp) { @@ -166,7 +150,18 @@ TEST(inference, nlp) { } if (FLAGS_num_threads > 1) { - test_multi_threads(); + std::vector> jobs; + bcast_datasets(datasets, &jobs, FLAGS_num_threads); + std::vector> threads; + for (int i = 0; i < FLAGS_num_threads; ++i) { + threads.emplace_back(new std::thread(ThreadRunInfer, i, &executor, scope, + std::ref(inference_program), + std::ref(jobs))); + } + for (int i = 0; i < FLAGS_num_threads; ++i) { + threads[i]->join(); + } + } else { if (FLAGS_prepare_vars) { executor.CreateVariables(*inference_program, scope, 0); @@ -200,14 +195,6 @@ TEST(inference, nlp) { LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60 << " min, avg time per seq: " << (stop_ms - start_ms) / datasets.size() << " ms"; -// { // just for test -// auto* scope = new paddle::framework::Scope(); -// paddle::framework::LoDTensor outtensor; -// TestInference( -// dirname, {&(datasets[0])}, {&outtensor}, FLAGS_repeat, model_combined, -// false); -// delete scope; -// } } delete scope; } From a4822ed897cebe6a27bd61d82c5a1b43022d3760 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 14:37:35 +0800 Subject: [PATCH 12/19] add thread setting --- .../tests/book/test_inference_nlp.cc | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 4e92d6a17b..fba64efece 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -19,6 +19,10 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" +#ifdef PADDLE_WITH_MKLML +#include +#include +#endif DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); @@ -149,6 +153,14 @@ TEST(inference, nlp) { EnableMKLDNN(inference_program); } +#ifdef PADDLE_WITH_MKLML + // only use 1 core per thread + omp_set_dynamic(0); + omp_set_num_threads(1); + mkl_set_num_threads(1); +#endif + + double start_ms = 0, stop_ms = 0; if (FLAGS_num_threads > 1) { std::vector> jobs; bcast_datasets(datasets, &jobs, FLAGS_num_threads); @@ -158,9 +170,11 @@ TEST(inference, nlp) { std::ref(inference_program), std::ref(jobs))); } + start_ms = get_current_ms(); for (int i = 0; i < FLAGS_num_threads; ++i) { threads[i]->join(); } + stop_ms = get_current_ms(); } else { if (FLAGS_prepare_vars) { @@ -185,16 +199,18 @@ TEST(inference, nlp) { std::map feed_targets; // for data and run - auto start_ms = get_current_ms(); + start_ms = get_current_ms(); for (size_t i = 0; i < datasets.size(); ++i) { feed_targets[feed_target_names[0]] = &(datasets[i]); executor.RunPreparedContext(ctx.get(), scope, &feed_targets, &fetch_targets, !FLAGS_prepare_vars); } - auto stop_ms = get_current_ms(); - LOG(INFO) << "Total infer time: " << (stop_ms - start_ms) / 1000.0 / 60 - << " min, avg time per seq: " - << (stop_ms - start_ms) / datasets.size() << " ms"; + stop_ms = get_current_ms(); } + + LOG(INFO) << "Total inference time with " << FLAGS_num_threads + << " threads : " << (stop_ms - start_ms) / 1000.0 + << " sec, avg time per seq: " + << (stop_ms - start_ms) / datasets.size() << " ms"; delete scope; } From 4a24c238c15212dd921bd0199beca6fc145cd62a Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 15:43:34 +0800 Subject: [PATCH 13/19] refine code --- paddle/fluid/inference/io.cc | 2 +- .../tests/book/test_inference_nlp.cc | 86 +++++++++---------- paddle/fluid/inference/tests/test_helper.h | 3 - 3 files changed, 42 insertions(+), 49 deletions(-) diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 98780b6881..65db7c7b50 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -117,7 +117,7 @@ std::unique_ptr Load(framework::Executor* executor, std::string program_desc_str; VLOG(3) << "loading model from " << model_filename; ReadBinaryFile(model_filename, &program_desc_str); - // LOG(INFO) << program_desc_str; + std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index fba64efece..962358d761 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -24,23 +24,22 @@ limitations under the License. */ #include #endif -DEFINE_string(dirname, "", "Directory of the inference model."); +DEFINE_string(modelpath, "", "Directory of the inference model."); +DEFINE_string(datafile, "", "File of input index data."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); -DEFINE_bool(prepare_context, true, "Prepare Context before executor"); - DEFINE_int32(num_threads, 1, "Number of threads should be used"); -inline double get_current_ms() { +inline double GetCurrentMs() { struct timeval time; gettimeofday(&time, NULL); return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } // return size of total words -size_t read_datasets(std::vector* out, - const std::string& filename) { +size_t LoadData(std::vector* out, + const std::string& filename) { size_t sz = 0; std::fstream fin(filename); std::string line; @@ -68,6 +67,23 @@ size_t read_datasets(std::vector* out, return sz; } +void SplitData( + const std::vector& datasets, + std::vector>* jobs, + const int num_threads) { + size_t s = 0; + jobs->resize(num_threads); + while (s < datasets.size()) { + for (auto it = jobs->begin(); it != jobs->end(); it++) { + it->emplace_back(&datasets[s]); + s++; + if (s >= datasets.size()) { + break; + } + } + } +} + void ThreadRunInfer( const int tid, paddle::framework::Executor* executor, paddle::framework::Scope* scope, @@ -80,7 +96,6 @@ void ThreadRunInfer( copy_program->SetFeedHolderName(feed_holder_name); copy_program->SetFetchHolderName(fetch_holder_name); - // 3. Get the feed_target_names and fetch_target_names const std::vector& feed_target_names = copy_program->GetFeedTargetNames(); const std::vector& fetch_target_names = @@ -95,51 +110,32 @@ void ThreadRunInfer( PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); auto& inputs = jobs[tid]; - auto start_ms = get_current_ms(); + auto start_ms = GetCurrentMs(); for (size_t i = 0; i < inputs.size(); ++i) { feed_targets[feed_target_names[0]] = inputs[i]; executor->Run(*copy_program, scope, &feed_targets, &fetch_targets, true, true, feed_holder_name, fetch_holder_name); } - auto stop_ms = get_current_ms(); + auto stop_ms = GetCurrentMs(); LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() << " samples, avg time per sample: " - << (stop_ms - start_ms) / inputs.size() << " ms"; } -void bcast_datasets( - const std::vector& datasets, - std::vector>* jobs, - const int num_threads) { - size_t s = 0; - jobs->resize(num_threads); - while (s < datasets.size()) { - for (auto it = jobs->begin(); it != jobs->end(); it++) { - it->emplace_back(&datasets[s]); - s++; - if (s >= datasets.size()) { - break; - } - } - } -} - TEST(inference, nlp) { - if (FLAGS_dirname.empty()) { - LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + if (FLAGS_modelpath.empty() || FLAGS_datafile.empty()) { + LOG(FATAL) << "Usage: ./example --modelpath=path/to/your/model " + << "--datafile=path/to/your/data"; } - LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; - std::string dirname = FLAGS_dirname; + LOG(INFO) << "Model Path: " << FLAGS_modelpath; + LOG(INFO) << "Data File: " << FLAGS_datafile; std::vector datasets; - size_t num_total_words = - read_datasets(&datasets, "/home/tangjian/paddle-tj/out.ids.txt"); - LOG(INFO) << "Number of dataset samples(seq len<1024): " << datasets.size(); + size_t num_total_words = LoadData(&datasets, FLAGS_datafile); + LOG(INFO) << "Number of samples (seq_len<1024): " << datasets.size(); LOG(INFO) << "Total number of words: " << num_total_words; const bool model_combined = false; - // 0. Call `paddle::framework::InitDevices()` initialize all the devices // 1. Define place, executor, scope auto place = paddle::platform::CPUPlace(); @@ -148,13 +144,14 @@ TEST(inference, nlp) { // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; - inference_program = InitProgram(&executor, scope, dirname, model_combined); + inference_program = + InitProgram(&executor, scope, FLAGS_modelpath, model_combined); if (FLAGS_use_mkldnn) { EnableMKLDNN(inference_program); } #ifdef PADDLE_WITH_MKLML - // only use 1 core per thread + // only use 1 thread number per std::thread omp_set_dynamic(0); omp_set_num_threads(1); mkl_set_num_threads(1); @@ -163,24 +160,23 @@ TEST(inference, nlp) { double start_ms = 0, stop_ms = 0; if (FLAGS_num_threads > 1) { std::vector> jobs; - bcast_datasets(datasets, &jobs, FLAGS_num_threads); + SplitData(datasets, &jobs, FLAGS_num_threads); std::vector> threads; for (int i = 0; i < FLAGS_num_threads; ++i) { threads.emplace_back(new std::thread(ThreadRunInfer, i, &executor, scope, std::ref(inference_program), std::ref(jobs))); } - start_ms = get_current_ms(); + start_ms = GetCurrentMs(); for (int i = 0; i < FLAGS_num_threads; ++i) { threads[i]->join(); } - stop_ms = get_current_ms(); - + stop_ms = GetCurrentMs(); } else { if (FLAGS_prepare_vars) { executor.CreateVariables(*inference_program, scope, 0); } - // always prepare context and burning first time + // always prepare context std::unique_ptr ctx; ctx = executor.Prepare(*inference_program, 0); @@ -198,14 +194,14 @@ TEST(inference, nlp) { PADDLE_ENFORCE_EQ(feed_target_names.size(), 1UL); std::map feed_targets; - // for data and run - start_ms = get_current_ms(); + // feed data and run + start_ms = GetCurrentMs(); for (size_t i = 0; i < datasets.size(); ++i) { feed_targets[feed_target_names[0]] = &(datasets[i]); executor.RunPreparedContext(ctx.get(), scope, &feed_targets, &fetch_targets, !FLAGS_prepare_vars); } - stop_ms = get_current_ms(); + stop_ms = GetCurrentMs(); } LOG(INFO) << "Total inference time with " << FLAGS_num_threads diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 1f5551567c..01b8dc0be6 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -182,9 +182,6 @@ void TestInference(const std::string& dirname, "init_program", paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); - // std::string binary_str; - // inference_program->Proto()->SerializeToString(&binary_str); - // LOG(INFO) << binary_str; if (use_mkldnn) { EnableMKLDNN(inference_program); } From 3206bcd9291833518289e73e37513cdbc29e96c7 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 16:24:55 +0800 Subject: [PATCH 14/19] refine log and add QPS --- paddle/fluid/inference/tests/book/test_inference_nlp.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 962358d761..378e1620a0 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -202,11 +202,13 @@ TEST(inference, nlp) { &fetch_targets, !FLAGS_prepare_vars); } stop_ms = GetCurrentMs(); + LOG(INFO) << "Tid: 0, process " << datasets.size() + << " samples, avg time per sample: " + << (stop_ms - start_ms) / datasets.size() << " ms"; } LOG(INFO) << "Total inference time with " << FLAGS_num_threads << " threads : " << (stop_ms - start_ms) / 1000.0 - << " sec, avg time per seq: " - << (stop_ms - start_ms) / datasets.size() << " ms"; + << " sec, QPS: " << datasets.size() / ((stop_ms - start_ms) / 1000); delete scope; } From 7e9f0790e0366ef8db3f48f83635400d4742ad71 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 17:24:54 +0800 Subject: [PATCH 15/19] fix scope in thread --- paddle/fluid/inference/tests/book/test_inference_nlp.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 378e1620a0..f7788ccbf4 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -91,6 +91,8 @@ void ThreadRunInfer( const std::vector>& jobs) { auto copy_program = std::unique_ptr( new paddle::framework::ProgramDesc(*inference_program)); + auto& sub_scope = scope->NewScope(); + std::string feed_holder_name = "feed_" + paddle::string::to_string(tid); std::string fetch_holder_name = "fetch_" + paddle::string::to_string(tid); copy_program->SetFeedHolderName(feed_holder_name); @@ -113,10 +115,11 @@ void ThreadRunInfer( auto start_ms = GetCurrentMs(); for (size_t i = 0; i < inputs.size(); ++i) { feed_targets[feed_target_names[0]] = inputs[i]; - executor->Run(*copy_program, scope, &feed_targets, &fetch_targets, true, - true, feed_holder_name, fetch_holder_name); + executor->Run(*copy_program, &sub_scope, &feed_targets, &fetch_targets, + true, true, feed_holder_name, fetch_holder_name); } auto stop_ms = GetCurrentMs(); + scope->DeleteScope(&sub_scope); LOG(INFO) << "Tid: " << tid << ", process " << inputs.size() << " samples, avg time per sample: " << (stop_ms - start_ms) / inputs.size() << " ms"; From eaeb76c419fbad9b7d3dd083666f80d84f89f55f Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 19:35:49 +0800 Subject: [PATCH 16/19] add some comments --- .../tests/book/test_inference_nlp.cc | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index f7788ccbf4..c4d7b0bbf0 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -37,7 +37,8 @@ inline double GetCurrentMs() { return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } -// return size of total words +// Load the input word index data from file and save into LodTensor. +// Return the size of words. size_t LoadData(std::vector* out, const std::string& filename) { size_t sz = 0; @@ -67,6 +68,8 @@ size_t LoadData(std::vector* out, return sz; } +// Split input data samples into small pieces jobs as balanced as possible, +// according to the number of threads. void SplitData( const std::vector& datasets, std::vector>* jobs, @@ -116,7 +119,8 @@ void ThreadRunInfer( for (size_t i = 0; i < inputs.size(); ++i) { feed_targets[feed_target_names[0]] = inputs[i]; executor->Run(*copy_program, &sub_scope, &feed_targets, &fetch_targets, - true, true, feed_holder_name, fetch_holder_name); + true /*create_local_scope*/, true /*create_vars*/, + feed_holder_name, fetch_holder_name); } auto stop_ms = GetCurrentMs(); scope->DeleteScope(&sub_scope); @@ -143,12 +147,13 @@ TEST(inference, nlp) { // 1. Define place, executor, scope auto place = paddle::platform::CPUPlace(); auto executor = paddle::framework::Executor(place); - auto* scope = new paddle::framework::Scope(); + std::unique_ptr scope( + new paddle::framework::Scope()); // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; inference_program = - InitProgram(&executor, scope, FLAGS_modelpath, model_combined); + InitProgram(&executor, scope.get(), FLAGS_modelpath, model_combined); if (FLAGS_use_mkldnn) { EnableMKLDNN(inference_program); } @@ -166,9 +171,9 @@ TEST(inference, nlp) { SplitData(datasets, &jobs, FLAGS_num_threads); std::vector> threads; for (int i = 0; i < FLAGS_num_threads; ++i) { - threads.emplace_back(new std::thread(ThreadRunInfer, i, &executor, scope, - std::ref(inference_program), - std::ref(jobs))); + threads.emplace_back( + new std::thread(ThreadRunInfer, i, &executor, scope.get(), + std::ref(inference_program), std::ref(jobs))); } start_ms = GetCurrentMs(); for (int i = 0; i < FLAGS_num_threads; ++i) { @@ -177,7 +182,7 @@ TEST(inference, nlp) { stop_ms = GetCurrentMs(); } else { if (FLAGS_prepare_vars) { - executor.CreateVariables(*inference_program, scope, 0); + executor.CreateVariables(*inference_program, scope.get(), 0); } // always prepare context std::unique_ptr ctx; @@ -201,7 +206,7 @@ TEST(inference, nlp) { start_ms = GetCurrentMs(); for (size_t i = 0; i < datasets.size(); ++i) { feed_targets[feed_target_names[0]] = &(datasets[i]); - executor.RunPreparedContext(ctx.get(), scope, &feed_targets, + executor.RunPreparedContext(ctx.get(), scope.get(), &feed_targets, &fetch_targets, !FLAGS_prepare_vars); } stop_ms = GetCurrentMs(); @@ -209,9 +214,7 @@ TEST(inference, nlp) { << " samples, avg time per sample: " << (stop_ms - start_ms) / datasets.size() << " ms"; } - LOG(INFO) << "Total inference time with " << FLAGS_num_threads << " threads : " << (stop_ms - start_ms) / 1000.0 << " sec, QPS: " << datasets.size() / ((stop_ms - start_ms) / 1000); - delete scope; } From 38f8182df63d33ff619297d95f5a4431bf8d5362 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 20:41:18 +0800 Subject: [PATCH 17/19] work around with dummy test --- .../fluid/inference/tests/book/CMakeLists.txt | 8 ++++++- .../tests/book/test_inference_nlp.cc | 21 ++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt index 90357f99d1..b33df2942a 100644 --- a/paddle/fluid/inference/tests/book/CMakeLists.txt +++ b/paddle/fluid/inference/tests/book/CMakeLists.txt @@ -35,7 +35,13 @@ inference_test(image_classification ARGS vgg resnet) inference_test(label_semantic_roles) inference_test(recognize_digits ARGS mlp conv) inference_test(recommender_system) -inference_test(nlp) #inference_test(rnn_encoder_decoder) #inference_test(understand_sentiment ARGS conv) inference_test(word2vec) + +# This is an unly work around to make this test run +cc_test(test_inference_nlp + SRCS test_inference_nlp.cc + DEPS paddle_fluid + ARGS + --modelpath=${PADDLE_BINARY_DIR}/python/paddle/fluid/tests/book/recognize_digits_mlp.inference.model) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index c4d7b0bbf0..5ece6084df 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -37,10 +37,22 @@ inline double GetCurrentMs() { return 1e+3 * time.tv_sec + 1e-3 * time.tv_usec; } +// This function just give dummy data for recognize_digits model. +size_t DummyData(std::vector* out) { + paddle::framework::LoDTensor input; + SetupTensor(&input, {1, 1, 28, 28}, -1.f, 1.f); + out->emplace_back(input); + return 1; +} + // Load the input word index data from file and save into LodTensor. // Return the size of words. size_t LoadData(std::vector* out, const std::string& filename) { + if (filename.empty()) { + return DummyData(out); + } + size_t sz = 0; std::fstream fin(filename); std::string line; @@ -130,9 +142,12 @@ void ThreadRunInfer( } TEST(inference, nlp) { - if (FLAGS_modelpath.empty() || FLAGS_datafile.empty()) { - LOG(FATAL) << "Usage: ./example --modelpath=path/to/your/model " - << "--datafile=path/to/your/data"; + if (FLAGS_modelpath.empty()) { + LOG(FATAL) << "Usage: ./example --modelpath=path/to/your/model"; + } + if (FLAGS_datafile.empty()) { + LOG(WARNING) << " Not data file provided, will use dummy data!" + << "Note: if you use nlp model, please provide data file."; } LOG(INFO) << "Model Path: " << FLAGS_modelpath; LOG(INFO) << "Data File: " << FLAGS_datafile; From 99d00cce9330dac56aac52788d7fba76d0137430 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 1 Jun 2018 21:04:51 +0800 Subject: [PATCH 18/19] follow comment: refine where time started --- paddle/fluid/inference/tests/book/test_inference_nlp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 5ece6084df..c3bec27925 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -185,12 +185,12 @@ TEST(inference, nlp) { std::vector> jobs; SplitData(datasets, &jobs, FLAGS_num_threads); std::vector> threads; + start_ms = GetCurrentMs(); for (int i = 0; i < FLAGS_num_threads; ++i) { threads.emplace_back( new std::thread(ThreadRunInfer, i, &executor, scope.get(), std::ref(inference_program), std::ref(jobs))); } - start_ms = GetCurrentMs(); for (int i = 0; i < FLAGS_num_threads; ++i) { threads[i]->join(); } From 6ae7cbe252178e7bd3e5c3b7cde21581948b478f Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 4 Jun 2018 11:08:08 +0800 Subject: [PATCH 19/19] follow comments --- .../fluid/inference/tests/book/CMakeLists.txt | 3 ++- .../tests/book/test_inference_nlp.cc | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt index b33df2942a..2fa5a9540b 100644 --- a/paddle/fluid/inference/tests/book/CMakeLists.txt +++ b/paddle/fluid/inference/tests/book/CMakeLists.txt @@ -40,8 +40,9 @@ inference_test(recommender_system) inference_test(word2vec) # This is an unly work around to make this test run +# TODO(TJ): clean me up cc_test(test_inference_nlp SRCS test_inference_nlp.cc DEPS paddle_fluid ARGS - --modelpath=${PADDLE_BINARY_DIR}/python/paddle/fluid/tests/book/recognize_digits_mlp.inference.model) + --model_path=${PADDLE_BINARY_DIR}/python/paddle/fluid/tests/book/recognize_digits_mlp.inference.model) diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index c3bec27925..70aa42ac41 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -24,8 +24,8 @@ limitations under the License. */ #include #endif -DEFINE_string(modelpath, "", "Directory of the inference model."); -DEFINE_string(datafile, "", "File of input index data."); +DEFINE_string(model_path, "", "Directory of the inference model."); +DEFINE_string(data_file, "", "File of input index data."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); @@ -65,6 +65,7 @@ size_t LoadData(std::vector* out, ids.push_back(stoi(field)); } if (ids.size() >= 1024) { + // Synced with NLP guys, they will ignore input larger then 1024 continue; } @@ -142,18 +143,18 @@ void ThreadRunInfer( } TEST(inference, nlp) { - if (FLAGS_modelpath.empty()) { - LOG(FATAL) << "Usage: ./example --modelpath=path/to/your/model"; + if (FLAGS_model_path.empty()) { + LOG(FATAL) << "Usage: ./example --model_path=path/to/your/model"; } - if (FLAGS_datafile.empty()) { - LOG(WARNING) << " Not data file provided, will use dummy data!" + if (FLAGS_data_file.empty()) { + LOG(WARNING) << "No data file provided, will use dummy data!" << "Note: if you use nlp model, please provide data file."; } - LOG(INFO) << "Model Path: " << FLAGS_modelpath; - LOG(INFO) << "Data File: " << FLAGS_datafile; + LOG(INFO) << "Model Path: " << FLAGS_model_path; + LOG(INFO) << "Data File: " << FLAGS_data_file; std::vector datasets; - size_t num_total_words = LoadData(&datasets, FLAGS_datafile); + size_t num_total_words = LoadData(&datasets, FLAGS_data_file); LOG(INFO) << "Number of samples (seq_len<1024): " << datasets.size(); LOG(INFO) << "Total number of words: " << num_total_words; @@ -168,7 +169,7 @@ TEST(inference, nlp) { // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; inference_program = - InitProgram(&executor, scope.get(), FLAGS_modelpath, model_combined); + InitProgram(&executor, scope.get(), FLAGS_model_path, model_combined); if (FLAGS_use_mkldnn) { EnableMKLDNN(inference_program); }