You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc

229 lines
7.5 KiB

/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
namespace analysis {
// diff: similarity_norm.tmp_0, for speed: fc_4.tmp_1
static const char out_var_name[] = "reduce_sum_0.tmp_0";
// for diff: 154, for speed 111
constexpr int num_slots = 154;
struct OneSlotInBatch {
std::string name;
std::vector<std::vector<float>> data;
std::vector<int> shape;
std::vector<size_t> lod;
};
struct DataRecord {
std::vector<std::vector<OneSlotInBatch>> batched_data;
std::map<std::string, std::vector<std::vector<float>>> datasets;
size_t batch_iter{0}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) {
Load(path);
Prepare(batch_size);
}
void Load(const std::string &path) {
std::ifstream file(path);
std::string line;
int num_lines = 0;
while (std::getline(file, line)) {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data);
std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0UL,
"line %d, %s should be divisible", num_lines, name);
datasets[name].emplace_back(std::move(slot_data));
}
num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
"num samples should be divisible");
PADDLE_ENFORCE_GT(num_samples, 0UL);
}
void Prepare(int bs) {
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
PADDLE_ENFORCE_EQ(it->second.size(), num_samples,
"size of each slot should be equal");
}
size_t num_batches = num_samples / bs;
EXPECT_GT(num_batches, 0);
batched_data.resize(num_batches);
for (auto &one_batch : batched_data) {
one_batch.resize(datasets.size());
size_t i = 0;
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
auto &slot = one_batch[i];
slot.name = it->first;
slot.data.resize(bs);
slot.lod.resize(bs + 1);
slot.lod[0] = 0;
auto &lod = slot.lod;
auto &datas = it->second;
for (int k = 0; k < bs; ++k) {
size_t id = k + batch_iter * bs;
std::copy(datas[id].begin(), datas[id].end(),
std::back_inserter(slot.data[k]));
size_t len = datas[id].size() / 11;
PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),
"%s %d size should be divisible", slot.name, id);
lod[k + 1] = lod[k] + len;
}
slot.shape.assign({static_cast<int>(lod[bs]), 11});
i++;
}
}
}
const std::vector<OneSlotInBatch> &NextBatch() {
if (batch_iter >= batched_data.size() - 1) {
batch_iter = -1;
}
return batched_data[++batch_iter];
}
};
static void TensorAssignSlot(PaddleTensor *tensor, const OneSlotInBatch &slot) {
tensor->name = slot.name + "_embed";
tensor->shape = slot.shape;
tensor->dtype = PaddleDType::FLOAT32;
tensor->lod.clear();
tensor->lod.emplace_back(slot.lod);
TensorAssignData(tensor, slot.data);
}
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
const auto &one_batch = data->NextBatch();
input_slots->resize(one_batch.size());
for (size_t i = 0; i < one_batch.size(); ++i) {
auto &slot = one_batch[i];
TensorAssignSlot(&((*input_slots)[i]), slot);
}
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<PaddleTensor> input_slots;
int epoch = FLAGS_test_all_data ? data.batched_data.size() : 1;
LOG(INFO) << "number of samples: "
<< data.batched_data.size() * FLAGS_batch_size;
for (int bid = 0; bid < epoch; ++bid) {
PrepareInputs(&input_slots, &data);
(*inputs).emplace_back(input_slots);
}
}
void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params");
cfg->DisableGpu();
cfg->SwitchSpecifyInputNames();
cfg->SwitchIrDebug();
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
if (FLAGS_zero_copy) {
cfg->SwitchUseFeedFetchOps(false);
}
if (use_mkldnn) {
cfg->EnableMKLDNN();
[MKL-DNN] Add Fully Connected Op for inference only(#15226) * fuse mul and elementwise add to fc * Reimplement the FC forward operator * Fix FC MKLDNN integration by transposing weights * Add FC MKLDNN Pass test=develop * FC MKLDNN Pass: change memcpy to std::copy * Fix MKLDNN FC handling of mismatch input and weights dims * Lower tolerance for MKL-DNN in resnet50 test test=develop * Adjust FC to support MKLDNN Op placement test=develop * Adjust Placement Op to set use_mkldnn attribute for graph test=develop * MKLDNN FC: fix weights format so that gemm version is called test=develop * FC MKLDNN: Remove tolerance decrease from tester_helper * FC MKL-DNN: Refactor the code, change input reorder to weight reorder * MKL-DNN FC: Introduce operator caching test=develop * FC MKL-DNN: Fix the tensor type in ExpectedKernelType test=develop * FC MKL-DNN: fix style changes test=develop * FC MKL-DNN: fallback to native on non-supported dim sizes test=develop * FC MKLDNN: fix CMake paths test=develop * FC MKLDNN: Refine placement pass graph mkldnn attribute test=develop * Fix Transpiler error for fuse_conv_eltwise test=develop * Fix missing STL includes in files test=develop * FC MKL-DNN: Enable new output size computation Also, refine pass to comply with newest interface. test=develop * FC MKL-DNN: enable only when fc_mkldnn_pass is enabled * FC MKL-DNN: Allow Weights to use oi or io format * FC MKL-DNN: Adjust UT to work with correct dims test=develop * Enable MKL DEBUG for resnet50 analyzer test=develop * FC MKL-DNN: Improve Hashing function test=develop * FC MKL-DNN: Fix shape for fc weights in transpiler * FC MKL-DNN: Update input pointer in re-used fc primitive * Add log for not handling fc fuse for unsupported dims test=develop * FC MKL-DNN: Move transpose from pass to Op Kernel test=develop * FC MKL-DNN: Disable transpose in unit test test=develop * FC MKL-DNN: Remove fc_mkldnn_pass from default list * Correct Flag for fake data analyzer tests test=develop * FC MKL-DNN: Add comment about fc mkldnn pass disablement test=develop * FC MKL-DNN: Disable fc in int8 tests test=develop
6 years ago
cfg->pass_builder()->AppendPass("fc_mkldnn_pass");
}
// Enable seqpool_concat_fuse_pass, disabled by default since it takes much
// time
cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass");
}
void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg, use_mkldnn);
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
}
TEST(Analyzer_seq_pool1, profile) { profile(); }
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_seq_pool1, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}
// Compare Deterministic result
TEST(Analyzer_seq_pool1, compare_determine) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all);
}
// Check the fuse status
TEST(Analyzer_seq_pool1, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops);
ASSERT_TRUE(fuse_statis.count("fc_fuse"));
ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse"));
ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse"));
ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse"));
ASSERT_EQ(fuse_statis.at("fc_fuse"), 10);
EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2);
EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 2);
EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2);
LOG(INFO) << "num_ops: " << num_ops;
EXPECT_EQ(num_ops, 171);
}
// Compare result of AnalysisConfig and AnalysisConfig + ZeroCopy
TEST(Analyzer_seq_pool1, compare_zero_copy) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
std::vector<std::string> outputs_name;
outputs_name.emplace_back(out_var_name);
CompareAnalysisAndZeroCopy(reinterpret_cast<PaddlePredictor::Config *>(&cfg),
input_slots_all, outputs_name);
}
} // namespace analysis
} // namespace inference
} // namespace paddle