From 8f2e556e65ceaf4e530bcbe0055b33b236c73e17 Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Thu, 29 Nov 2018 19:33:47 +0800 Subject: [PATCH 1/5] support the small dam model. test=develop --- paddle/fluid/inference/tests/api/CMakeLists.txt | 4 +++- paddle/fluid/inference/tests/api/analyzer_dam_tester.cc | 9 ++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 7dc88d9dd0..b54693d948 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -48,7 +48,9 @@ inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2 # DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") -download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") +# For the normal DAM model +# download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") +download_model_and_data(${DAM_INSTALL_DIR} "small_dam_model.tar.gz" "small_dam_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) # chinese_ner diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index b369cba5c8..b5a68538ab 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -17,7 +17,7 @@ namespace paddle { namespace inference { using contrib::AnalysisConfig; -#define MAX_TURN_NUM 9 +#define MAX_TURN_NUM 1 #define MAX_TURN_LEN 50 static std::vector result_data; @@ -148,8 +148,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } void SetConfig(contrib::AnalysisConfig *cfg) { - cfg->prog_file = FLAGS_infer_model + "/__model__"; - cfg->param_file = FLAGS_infer_model + "/param"; + cfg->model_dir = FLAGS_infer_model; cfg->use_gpu = false; cfg->device = 0; cfg->specify_input_name = true; @@ -202,8 +201,8 @@ TEST(Analyzer_dam, fuse_statis) { auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 317); - EXPECT_EQ(num_ops, 2020); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 45); + EXPECT_EQ(num_ops, 292); } // Compare result of NativeConfig and AnalysisConfig From 33b49635055963def9ae057ef68259043934f48c Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Thu, 29 Nov 2018 20:57:19 +0800 Subject: [PATCH 2/5] unify the normal and small dam model. --- .../fluid/inference/tests/api/CMakeLists.txt | 9 ++- .../tests/api/analyzer_dam_tester.cc | 76 +++++++++++-------- 2 files changed, 52 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index b54693d948..ab45249f28 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -48,10 +48,13 @@ inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2 # DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") -# For the normal DAM model +# For the normal DAM model: download DAM_model.tar.gz and DAM_data.txt.tar.gz. # download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") -download_model_and_data(${DAM_INSTALL_DIR} "small_dam_model.tar.gz" "small_dam_data.txt.tar.gz") -inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) +download_model_and_data(${DAM_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") +# For the normal DAM model: --max_turn_num=9. +inference_analysis_test(test_analyzer_dam SRCS analyzer_dam_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${DAM_INSTALL_DIR}/model --infer_data=${DAM_INSTALL_DIR}/data.txt --max_turn_num=1) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index b5a68538ab..925f19417d 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -14,38 +14,54 @@ #include "paddle/fluid/inference/tests/api/tester_helper.h" +DEFINE_int32(max_turn_num, 1, + "The max turn number: 1 for the small and 9 for the normal."); + namespace paddle { namespace inference { using contrib::AnalysisConfig; -#define MAX_TURN_NUM 1 -#define MAX_TURN_LEN 50 + +constexpr int32_t kMaxTurnLen = 50; + static std::vector result_data; struct DataRecord { - std::vector> - turns[MAX_TURN_NUM]; // turns data : MAX_TURN_NUM - std::vector> - turns_mask[MAX_TURN_NUM]; // turns mask data : MAX_TURN_NUM - std::vector> response; // response data : 1 + std::vector> *turns; + std::vector> *turns_mask; + std::vector> response; // response data : 1 std::vector> response_mask; // response mask data : 1 size_t batch_iter{0}; size_t batch_size{1}; size_t num_samples; // total number of samples - DataRecord() = default; + + DataRecord() { + turns = new std::vector>[FLAGS_max_turn_num]; // turns data : FLAGS_max_turn_num + turns_mask = new std::vector>[FLAGS_max_turn_num]; // turns mask data : FLAGS_max_turn_num + } + explicit DataRecord(const std::string &path, int batch_size = 1) - : batch_size(batch_size) { + : DataRecord() { + this->batch_size = batch_size; Load(path); } + + ~DataRecord() { + delete[] turns; + delete[] turns_mask; + } + DataRecord NextBatch() { DataRecord data; size_t batch_end = batch_iter + batch_size; // NOTE skip the final batch, if no enough data is provided. if (batch_end <= response.size()) { - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { data.turns[i].assign(turns[i].begin() + batch_iter, turns[i].begin() + batch_end); } - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { data.turns_mask[i].assign(turns_mask[i].begin() + batch_iter, turns_mask[i].begin() + batch_end); } @@ -60,6 +76,7 @@ struct DataRecord { batch_iter += batch_size; return data; } + void Load(const std::string &path) { std::ifstream file(path); std::string line; @@ -69,30 +86,30 @@ struct DataRecord { num_lines++; std::vector data; split(line, ',', &data); - CHECK_EQ(data.size(), (size_t)(2 * MAX_TURN_NUM + 3)); + CHECK_EQ(data.size(), (size_t)(2 * FLAGS_max_turn_num + 3)); // load turn data - std::vector turns_tmp[MAX_TURN_NUM]; - for (int i = 0; i < MAX_TURN_NUM; ++i) { + std::vector turns_tmp[FLAGS_max_turn_num]; + for (int i = 0; i < FLAGS_max_turn_num; ++i) { split_to_int64(data[i], ' ', &turns_tmp[i]); turns[i].push_back(std::move(turns_tmp[i])); } // load turn_mask data - std::vector turns_mask_tmp[MAX_TURN_NUM]; - for (int i = 0; i < MAX_TURN_NUM; ++i) { - split_to_float(data[MAX_TURN_NUM + i], ' ', &turns_mask_tmp[i]); + std::vector turns_mask_tmp[FLAGS_max_turn_num]; + for (int i = 0; i < FLAGS_max_turn_num; ++i) { + split_to_float(data[FLAGS_max_turn_num + i], ' ', &turns_mask_tmp[i]); turns_mask[i].push_back(std::move(turns_mask_tmp[i])); } // load response data std::vector response_tmp; - split_to_int64(data[2 * MAX_TURN_NUM], ' ', &response_tmp); + split_to_int64(data[2 * FLAGS_max_turn_num], ' ', &response_tmp); response.push_back(std::move(response_tmp)); // load response_mask data std::vector response_mask_tmp; - split_to_float(data[2 * MAX_TURN_NUM + 1], ' ', &response_mask_tmp); + split_to_float(data[2 * FLAGS_max_turn_num + 1], ' ', &response_mask_tmp); response_mask.push_back(std::move(response_mask_tmp)); // load result data float result_tmp; - result_tmp = std::stof(data[2 * MAX_TURN_NUM + 2]); + result_tmp = std::stof(data[2 * FLAGS_max_turn_num + 2]); result_data.push_back(result_tmp); } num_samples = num_lines; @@ -101,8 +118,8 @@ struct DataRecord { void PrepareInputs(std::vector *input_slots, DataRecord *data, int batch_size) { - PaddleTensor turns_tensor[MAX_TURN_NUM]; - PaddleTensor turns_mask_tensor[MAX_TURN_NUM]; + PaddleTensor turns_tensor[FLAGS_max_turn_num]; + PaddleTensor turns_mask_tensor[FLAGS_max_turn_num]; PaddleTensor response_tensor; PaddleTensor response_mask_tensor; std::string turn_pre = "turn_"; @@ -110,16 +127,16 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, auto one_batch = data->NextBatch(); int size = one_batch.response[0].size(); - CHECK_EQ(size, MAX_TURN_LEN); + CHECK_EQ(size, kMaxTurnLen); // turn tensor assignment - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { turns_tensor[i].name = turn_pre + std::to_string(i); turns_tensor[i].shape.assign({batch_size, size, 1}); turns_tensor[i].dtype = PaddleDType::INT64; TensorAssignData(&turns_tensor[i], one_batch.turns[i]); } // turn mask tensor assignment - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { turns_mask_tensor[i].name = turn_mask_pre + std::to_string(i); turns_mask_tensor[i].shape.assign({batch_size, size, 1}); turns_mask_tensor[i].dtype = PaddleDType::FLOAT32; @@ -137,10 +154,10 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, TensorAssignData(&response_mask_tensor, one_batch.response_mask); // Set inputs. - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { input_slots->push_back(std::move(turns_tensor[i])); } - for (int i = 0; i < MAX_TURN_NUM; ++i) { + for (int i = 0; i < FLAGS_max_turn_num; ++i) { input_slots->push_back(std::move(turns_mask_tensor[i])); } input_slots->push_back(std::move(response_tensor)); @@ -148,7 +165,8 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } void SetConfig(contrib::AnalysisConfig *cfg) { - cfg->model_dir = FLAGS_infer_model; + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; cfg->use_gpu = false; cfg->device = 0; cfg->specify_input_name = true; @@ -201,8 +219,6 @@ TEST(Analyzer_dam, fuse_statis) { auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 45); - EXPECT_EQ(num_ops, 292); } // Compare result of NativeConfig and AnalysisConfig From d5947b0ed7b0b0d1365d40b12de7bc4fc099a2b4 Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Thu, 29 Nov 2018 21:12:29 +0800 Subject: [PATCH 3/5] test=develop --- paddle/fluid/inference/tests/api/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index ab45249f28..22c9a8735b 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -51,7 +51,7 @@ set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") # For the normal DAM model: download DAM_model.tar.gz and DAM_data.txt.tar.gz. # download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") download_model_and_data(${DAM_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") -# For the normal DAM model: --max_turn_num=9. +# For the normal DAM model: use --max_turn_num=9. inference_analysis_test(test_analyzer_dam SRCS analyzer_dam_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${DAM_INSTALL_DIR}/model --infer_data=${DAM_INSTALL_DIR}/data.txt --max_turn_num=1) From e1da6cd75407e5e72c3900ab2d098230b308e2e8 Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Thu, 29 Nov 2018 21:33:49 +0800 Subject: [PATCH 4/5] add the normal dam and the small dam --- paddle/fluid/inference/tests/api/CMakeLists.txt | 16 +++++++++------- .../inference/tests/api/analyzer_dam_tester.cc | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 22c9a8735b..78c8ed1534 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -46,15 +46,17 @@ set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2") download_model_and_data(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc) -# DAM +# Normal DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") -# For the normal DAM model: download DAM_model.tar.gz and DAM_data.txt.tar.gz. -# download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") -download_model_and_data(${DAM_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") -# For the normal DAM model: use --max_turn_num=9. -inference_analysis_test(test_analyzer_dam SRCS analyzer_dam_tester.cc +download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) + +# Small DAM +set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") +download_model_and_data(${DAM_SMALL_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") +inference_analysis_test(test_analyzer_small_dam SRCS analyzer_dam_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} - ARGS --infer_model=${DAM_INSTALL_DIR}/model --infer_data=${DAM_INSTALL_DIR}/data.txt --max_turn_num=1) + ARGS --infer_model=${DAM_SMALL_INSTALL_DIR}/model --infer_data=${DAM_SMALL_INSTALL_DIR}/data.txt --max_turn_num=1) # chinese_ner set(CHINESE_NER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/chinese_ner") diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 925f19417d..a3a6130db7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -14,7 +14,7 @@ #include "paddle/fluid/inference/tests/api/tester_helper.h" -DEFINE_int32(max_turn_num, 1, +DEFINE_int32(max_turn_num, 9, "The max turn number: 1 for the small and 9 for the normal."); namespace paddle { From 6e48e47406cb3191d4a8a9901730bbe4e8e50cb7 Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Thu, 29 Nov 2018 21:35:36 +0800 Subject: [PATCH 5/5] test=develop --- paddle/fluid/inference/tests/api/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 78c8ed1534..a07626a103 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -46,12 +46,12 @@ set(RNN2_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn2") download_model_and_data(${RNN2_INSTALL_DIR} "rnn2_model.tar.gz" "rnn2_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2_tester.cc) -# Normal DAM +# normal DAM set(DAM_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/dam") download_model_and_data(${DAM_INSTALL_DIR} "DAM_model.tar.gz" "DAM_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_dam ${DAM_INSTALL_DIR} analyzer_dam_tester.cc) -# Small DAM +# small DAM set(DAM_SMALL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_dam") download_model_and_data(${DAM_SMALL_INSTALL_DIR} "dam_small_model.tar.gz" "dam_small_data.txt.tar.gz") inference_analysis_test(test_analyzer_small_dam SRCS analyzer_dam_tester.cc