Paddle/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc

/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <algorithm>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"

namespace paddle {
namespace inference {
namespace analysis {

// diff: similarity_norm.tmp_0, for speed: fc_4.tmp_1
static const char out_var_name[] = "reduce_sum_0.tmp_0";

// for diff: 154, for speed 111
constexpr int num_slots = 154;

struct OneSlotInBatch {
  std::string name;
  std::vector<std::vector<float>> data;
  std::vector<int> shape;
  std::vector<size_t> lod;
};

struct DataRecord {
  std::vector<std::vector<OneSlotInBatch>> batched_data;
  std::map<std::string, std::vector<std::vector<float>>> datasets;
  size_t batch_iter{0}, num_samples;  // total number of samples

  DataRecord() = default;
  explicit DataRecord(const std::string &path, int batch_size = 1) {
    Load(path);
    Prepare(batch_size);
  }

  void Load(const std::string &path) {
    std::ifstream file(path);
    std::string line;
    int num_lines = 0;
    while (std::getline(file, line)) {
      num_lines++;
      std::vector<std::string> data;
      split(line, '\t', &data);
      std::vector<float> slot_data;
      split_to_float(data[1], ' ', &slot_data);
      std::string name = data[0];
      PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,
                        "line %d, %s should be divisible", num_lines, name);
      datasets[name].emplace_back(std::move(slot_data));
    }
    num_samples = num_lines / num_slots;
    PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
                      "num samples should be divisible");
    PADDLE_ENFORCE_GT(num_samples, 0UL);
  }

  void Prepare(int bs) {
    for (auto it = datasets.begin(); it != datasets.end(); ++it) {
      PADDLE_ENFORCE_EQ(it->second.size(), num_samples,
                        "size of each slot should be equal");
    }
    size_t num_batches = num_samples / bs;
    EXPECT_GT(num_batches, 0);
    batched_data.resize(num_batches);
    for (auto &one_batch : batched_data) {
      one_batch.resize(datasets.size());
      size_t i = 0;
      for (auto it = datasets.begin(); it != datasets.end(); ++it) {
        auto &slot = one_batch[i];
        slot.name = it->first;
        slot.data.resize(bs);
        slot.lod.resize(bs + 1);
        slot.lod[0] = 0;
        auto &lod = slot.lod;
        auto &datas = it->second;
        for (int k = 0; k < bs; ++k) {
          size_t id = k + batch_iter * bs;
          std::copy(datas[id].begin(), datas[id].end(),
                    std::back_inserter(slot.data[k]));
          size_t len = datas[id].size() / 11;
          PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),
                            "%s %d size should be divisible", slot.name, id);
          lod[k + 1] = lod[k] + len;
        }
        slot.shape.assign({static_cast<int>(lod[bs]), 11});
        i++;
      }
    }
  }

  const std::vector<OneSlotInBatch> &NextBatch() {
    if (batch_iter >= batched_data.size() - 1) {
      batch_iter = -1;
    }
    return batched_data[++batch_iter];
  }
};

static void TensorAssignSlot(PaddleTensor *tensor, const OneSlotInBatch &slot) {
  tensor->name = slot.name + "_embed";
  tensor->shape = slot.shape;
  tensor->dtype = PaddleDType::FLOAT32;
  tensor->lod.clear();
  tensor->lod.emplace_back(slot.lod);
  TensorAssignData(tensor, slot.data);
}

void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
  const auto &one_batch = data->NextBatch();
  input_slots->resize(one_batch.size());
  for (size_t i = 0; i < one_batch.size(); ++i) {
    auto &slot = one_batch[i];
    TensorAssignSlot(&((*input_slots)[i]), slot);
  }
}

void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
  std::vector<PaddleTensor> input_slots;
  int epoch = FLAGS_test_all_data ? data.batched_data.size() : 1;
  LOG(INFO) << "number of samples: "
            << data.batched_data.size() * FLAGS_batch_size;
  for (int bid = 0; bid < epoch; ++bid) {
    PrepareInputs(&input_slots, &data);
    (*inputs).emplace_back(input_slots);
  }
}

void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
  cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");
  cfg->DisableGpu();
  cfg->SwitchSpecifyInputNames();
  cfg->SwitchIrDebug();
  cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
  if (FLAGS_zero_copy) {
    cfg->SwitchUseFeedFetchOps(false);
  }
  if (use_mkldnn) {
    cfg->EnableMKLDNN();
    cfg->pass_builder()->AppendPass("fc_mkldnn_pass");
  }
  // Enable seqpool_concat_fuse_pass, disabled by default since it takes much
  // time
  cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass");
}

void profile(bool use_mkldnn = false) {
  AnalysisConfig cfg;
  SetConfig(&cfg, use_mkldnn);

  std::vector<std::vector<PaddleTensor>> outputs;
  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
                 input_slots_all, &outputs, FLAGS_num_threads);
}

TEST(Analyzer_seq_pool1, profile) { profile(); }

// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_seq_pool1, compare) {
  AnalysisConfig cfg;
  SetConfig(&cfg);

  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  CompareNativeAndAnalysis(
      reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}

// Compare Deterministic result
TEST(Analyzer_seq_pool1, compare_determine) {
  AnalysisConfig cfg;
  SetConfig(&cfg);

  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
                       input_slots_all);
}

// Check the fuse status
TEST(Analyzer_seq_pool1, fuse_statis) {
  AnalysisConfig cfg;
  SetConfig(&cfg);
  int num_ops;
  auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
  auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops);
  ASSERT_TRUE(fuse_statis.count("fc_fuse"));
  ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse"));
  ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse"));
  ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse"));
  ASSERT_EQ(fuse_statis.at("fc_fuse"), 10);
  EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2);
  EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 2);
  EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);
  LOG(INFO) << "num_ops: " << num_ops;
  EXPECT_EQ(num_ops, 171);
}

// Compare result of AnalysisConfig and AnalysisConfig + ZeroCopy
TEST(Analyzer_seq_pool1, compare_zero_copy) {
  AnalysisConfig cfg;
  SetConfig(&cfg);

  std::vector<std::vector<PaddleTensor>> input_slots_all;
  SetInput(&input_slots_all);
  std::vector<std::string> outputs_name;
  outputs_name.emplace_back(out_var_name);
  CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),
                             input_slots_all, outputs_name);
}

}  // namespace analysis
}  // namespace inference
}  // namespace paddle
add seq pool inference test test=develop 6 years ago			`/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`

add test data for seqpool1 6 years ago			`#include <algorithm>`
add seq pool inference test test=develop 6 years ago			`#include <fstream>`
			`#include <iostream>`
			`#include "paddle/fluid/inference/tests/api/tester_helper.h"`

			`namespace paddle {`
			`namespace inference {`
			`namespace analysis {`

fix fuse square mat order and refine test test=develop 6 years ago			`// diff: similarity_norm.tmp_0, for speed: fc_4.tmp_1`
			`static const char out_var_name[] = "reduce_sum_0.tmp_0";`

			`// for diff: 154, for speed 111`
			`constexpr int num_slots = 154;`

add test data for seqpool1 6 years ago			`struct OneSlotInBatch {`
			`std::string name;`
			`std::vector<std::vector<float>> data;`
			`std::vector<int> shape;`
			`std::vector<size_t> lod;`
			`};`

			`struct DataRecord {`
			`std::vector<std::vector<OneSlotInBatch>> batched_data;`
			`std::map<std::string, std::vector<std::vector<float>>> datasets;`
			`size_t batch_iter{0}, num_samples; // total number of samples`

			`DataRecord() = default;`
			`explicit DataRecord(const std::string &path, int batch_size = 1) {`
			`Load(path);`
			`Prepare(batch_size);`
			`}`

			`void Load(const std::string &path) {`
			`std::ifstream file(path);`
			`std::string line;`
			`int num_lines = 0;`
			`while (std::getline(file, line)) {`
			`num_lines++;`
			`std::vector<std::string> data;`
			`split(line, '\t', &data);`
			`std::vector<float> slot_data;`
			`split_to_float(data[1], ' ', &slot_data);`
			`std::string name = data[0];`
fix warnings (#15790) * fix warnings test=develop * fix enforce test test=develop 6 years ago			`PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,`
add test data for seqpool1 6 years ago			`"line %d, %s should be divisible", num_lines, name);`
			`datasets[name].emplace_back(std::move(slot_data));`
			`}`
			`num_samples = num_lines / num_slots;`
			`PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),`
			`"num samples should be divisible");`
fix warnings (#15790) * fix warnings test=develop * fix enforce test test=develop 6 years ago			`PADDLE_ENFORCE_GT(num_samples, 0UL);`
add test data for seqpool1 6 years ago			`}`

			`void Prepare(int bs) {`
			`for (auto it = datasets.begin(); it != datasets.end(); ++it) {`
			`PADDLE_ENFORCE_EQ(it->second.size(), num_samples,`
			`"size of each slot should be equal");`
			`}`
			`size_t num_batches = num_samples / bs;`
			`EXPECT_GT(num_batches, 0);`
			`batched_data.resize(num_batches);`
			`for (auto &one_batch : batched_data) {`
			`one_batch.resize(datasets.size());`
			`size_t i = 0;`
			`for (auto it = datasets.begin(); it != datasets.end(); ++it) {`
			`auto &slot = one_batch[i];`
			`slot.name = it->first;`
			`slot.data.resize(bs);`
			`slot.lod.resize(bs + 1);`
			`slot.lod[0] = 0;`
			`auto &lod = slot.lod;`
			`auto &datas = it->second;`
			`for (int k = 0; k < bs; ++k) {`
			`size_t id = k + batch_iter * bs;`
			`std::copy(datas[id].begin(), datas[id].end(),`
			`std::back_inserter(slot.data[k]));`
			`size_t len = datas[id].size() / 11;`
			`PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),`
			`"%s %d size should be divisible", slot.name, id);`
			`lod[k + 1] = lod[k] + len;`
			`}`
			`slot.shape.assign({static_cast<int>(lod[bs]), 11});`
			`i++;`
			`}`
			`}`
			`}`

			`const std::vector<OneSlotInBatch> &NextBatch() {`
			`if (batch_iter >= batched_data.size() - 1) {`
			`batch_iter = -1;`
			`}`
			`return batched_data[++batch_iter];`
			`}`
			`};`

			`static void TensorAssignSlot(PaddleTensor *tensor, const OneSlotInBatch &slot) {`
			`tensor->name = slot.name + "_embed";`
			`tensor->shape = slot.shape;`
			`tensor->dtype = PaddleDType::FLOAT32;`
			`tensor->lod.clear();`
			`tensor->lod.emplace_back(slot.lod);`
			`TensorAssignData(tensor, slot.data);`
			`}`

			`void PrepareInputs(std::vector<PaddleTensor> input_slots, DataRecord data) {`
			`const auto &one_batch = data->NextBatch();`
			`input_slots->resize(one_batch.size());`
			`for (size_t i = 0; i < one_batch.size(); ++i) {`
			`auto &slot = one_batch[i];`
			`TensorAssignSlot(&((*input_slots)[i]), slot);`
			`}`
			`}`

add seq pool inference test test=develop 6 years ago			`void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {`
add test data for seqpool1 6 years ago			`DataRecord data(FLAGS_infer_data, FLAGS_batch_size);`
			`std::vector<PaddleTensor> input_slots;`
			`int epoch = FLAGS_test_all_data ? data.batched_data.size() : 1;`
			`LOG(INFO) << "number of samples: "`
			`<< data.batched_data.size() * FLAGS_batch_size;`
			`for (int bid = 0; bid < epoch; ++bid) {`
			`PrepareInputs(&input_slots, &data);`
			`(*inputs).emplace_back(input_slots);`
			`}`
add seq pool inference test test=develop 6 years ago			`}`

add zerocopy for seqpool test 6 years ago			`void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {`
			`cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");`
			`cfg->DisableGpu();`
			`cfg->SwitchSpecifyInputNames();`
fix ir debug config (#15571) 6 years ago			`cfg->SwitchIrDebug();`
add zerocopy for seqpool test 6 years ago			`cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);`
simplify the zero_copy tests test=develop 6 years ago			`if (FLAGS_zero_copy) {`
			`cfg->SwitchUseFeedFetchOps(false);`
			`}`
add zerocopy for seqpool test 6 years ago			`if (use_mkldnn) {`
			`cfg->EnableMKLDNN();`
[MKL-DNN] Add Fully Connected Op for inference only(#15226) * fuse mul and elementwise add to fc * Reimplement the FC forward operator * Fix FC MKLDNN integration by transposing weights * Add FC MKLDNN Pass test=develop * FC MKLDNN Pass: change memcpy to std::copy * Fix MKLDNN FC handling of mismatch input and weights dims * Lower tolerance for MKL-DNN in resnet50 test test=develop * Adjust FC to support MKLDNN Op placement test=develop * Adjust Placement Op to set use_mkldnn attribute for graph test=develop * MKLDNN FC: fix weights format so that gemm version is called test=develop * FC MKLDNN: Remove tolerance decrease from tester_helper * FC MKL-DNN: Refactor the code, change input reorder to weight reorder * MKL-DNN FC: Introduce operator caching test=develop * FC MKL-DNN: Fix the tensor type in ExpectedKernelType test=develop * FC MKL-DNN: fix style changes test=develop * FC MKL-DNN: fallback to native on non-supported dim sizes test=develop * FC MKLDNN: fix CMake paths test=develop * FC MKLDNN: Refine placement pass graph mkldnn attribute test=develop * Fix Transpiler error for fuse_conv_eltwise test=develop * Fix missing STL includes in files test=develop * FC MKL-DNN: Enable new output size computation Also, refine pass to comply with newest interface. test=develop * FC MKL-DNN: enable only when fc_mkldnn_pass is enabled * FC MKL-DNN: Allow Weights to use oi or io format * FC MKL-DNN: Adjust UT to work with correct dims test=develop * Enable MKL DEBUG for resnet50 analyzer test=develop * FC MKL-DNN: Improve Hashing function test=develop * FC MKL-DNN: Fix shape for fc weights in transpiler * FC MKL-DNN: Update input pointer in re-used fc primitive * Add log for not handling fc fuse for unsupported dims test=develop * FC MKL-DNN: Move transpose from pass to Op Kernel test=develop * FC MKL-DNN: Disable transpose in unit test test=develop * FC MKL-DNN: Remove fc_mkldnn_pass from default list * Correct Flag for fake data analyzer tests test=develop * FC MKL-DNN: Add comment about fc mkldnn pass disablement test=develop * FC MKL-DNN: Disable fc in int8 tests test=develop 6 years ago			`cfg->pass_builder()->AppendPass("fc_mkldnn_pass");`
add zerocopy for seqpool test 6 years ago			`}`
disable seqpool concat pass by default saving CI time test=develop 6 years ago			`// Enable seqpool_concat_fuse_pass, disabled by default since it takes much`
			`// time`
			`cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass");`
add zerocopy for seqpool test 6 years ago			`}`

add seq pool inference test test=develop 6 years ago			`void profile(bool use_mkldnn = false) {`
			`AnalysisConfig cfg;`
add zerocopy for seqpool test 6 years ago			`SetConfig(&cfg, use_mkldnn);`
add seq pool inference test test=develop 6 years ago
fix dataset reading and add support for full dataset (#16559) 6 years ago			`std::vector<std::vector<PaddleTensor>> outputs;`
add seq pool inference test test=develop 6 years ago			`std::vector<std::vector<PaddleTensor>> input_slots_all;`
			`SetInput(&input_slots_all);`
			`TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),`
			`input_slots_all, &outputs, FLAGS_num_threads);`
			`}`

			`TEST(Analyzer_seq_pool1, profile) { profile(); }`

add test data for seqpool1 6 years ago			`// Compare result of NativeConfig and AnalysisConfig`
			`TEST(Analyzer_seq_pool1, compare) {`
			`AnalysisConfig cfg;`
			`SetConfig(&cfg);`

			`std::vector<std::vector<PaddleTensor>> input_slots_all;`
			`SetInput(&input_slots_all);`
			`CompareNativeAndAnalysis(`
			`reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);`
			`}`

add compare_determine of seqpool1 test test=develop 6 years ago			`// Compare Deterministic result`
			`TEST(Analyzer_seq_pool1, compare_determine) {`
			`AnalysisConfig cfg;`
			`SetConfig(&cfg);`

			`std::vector<std::vector<PaddleTensor>> input_slots_all;`
			`SetInput(&input_slots_all);`
			`CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),`
			`input_slots_all);`
			`}`

simplify the zero_copy tests test=develop 6 years ago			`// Check the fuse status`
			`TEST(Analyzer_seq_pool1, fuse_statis) {`
add seq pool inference test test=develop 6 years ago			`AnalysisConfig cfg;`
			`SetConfig(&cfg);`
			`int num_ops;`
			`auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);`
add compare zerocopy test with native result test=develop 6 years ago			`auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops);`
add zerocopy for seqpool test 6 years ago			`ASSERT_TRUE(fuse_statis.count("fc_fuse"));`
add seqpool concat fuse pass test=develop 6 years ago			`ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse"));`
fix fuse square mat order and refine test test=develop 6 years ago			`ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse"));`
			`ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse"));`
			`ASSERT_EQ(fuse_statis.at("fc_fuse"), 10);`
add seqpool concat fuse pass test=develop 6 years ago			`EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2);`
fix fuse square mat order and refine test test=develop 6 years ago			`EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 2);`
			`EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);`
add seq pool inference test test=develop 6 years ago			`LOG(INFO) << "num_ops: " << num_ops;`
fix fuse square mat order and refine test test=develop 6 years ago			`EXPECT_EQ(num_ops, 171);`
add seq pool inference test test=develop 6 years ago			`}`

simplify the zero_copy tests test=develop 6 years ago			`// Compare result of AnalysisConfig and AnalysisConfig + ZeroCopy`
			`TEST(Analyzer_seq_pool1, compare_zero_copy) {`
			`AnalysisConfig cfg;`
			`SetConfig(&cfg);`
add compare zerocopy test with native result test=develop 6 years ago
			`std::vector<std::vector<PaddleTensor>> input_slots_all;`
			`SetInput(&input_slots_all);`
simplify the zero_copy tests test=develop 6 years ago			`std::vector<std::string> outputs_name;`
			`outputs_name.emplace_back(out_var_name);`
			`CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),`
			`input_slots_all, outputs_name);`
add zerocopy for seqpool test 6 years ago			`}`

add seq pool inference test test=develop 6 years ago			`} // namespace analysis`
			`} // namespace inference`
			`} // namespace paddle`