fix dataset reading and add support for full dataset (#16559)

revert-16555-model_data_cryption_link_all_lib
Wojciech Uss 6 years ago committed by Tao Luo
parent 220190d5d2
commit 9b6a029666

@ -27,6 +27,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
}
static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double latency, int epoch = 1) {
LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat
<< ", threads: " << num_threads << ", thread id: " << tid
<< ", latency: " << latency << "ms, fps: " << 1 / (latency / 1000.f)
double batch_latency, int epoch = 1) {
PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size.");
double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======";
if (epoch > 1) {
int samples = batch_size * epoch;
LOG(INFO) << "====== sample number: " << samples
<< ", average latency of each sample: " << latency / samples
<< "ms ======";
}
LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch
<< ", repetitions: " << repeat << " ======";
LOG(INFO) << "====== batch latency: " << batch_latency
<< "ms, number of samples: " << batch_size * epoch
<< ", sample latency: " << sample_latency
<< "ms, fps: " << 1000.f / sample_latency << " ======";
}
static bool IsFileExists(const std::string &path) {

@ -26,7 +26,11 @@ endfunction()
function(inference_analysis_api_int8_test target model_dir data_dir filename)
inference_analysis_test(${target} SRCS ${filename}
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} benchmark
ARGS --infer_model=${model_dir}/model --infer_data=${data_dir}/data.bin --batch_size=100)
ARGS --infer_model=${model_dir}/model
--infer_data=${data_dir}/data.bin
--warmup_batch_size=100
--batch_size=50
--iterations=2)
endfunction()
function(inference_analysis_api_test_with_fake_data target install_dir filename model_name)
@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con
# int8 image classification tests
if(WITH_MKLDNN)
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8")
set(INT8_DATA_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
if (NOT EXISTS ${INT8_DATA_DIR})
inference_download_and_uncompress(${INT8_DATA_DIR} ${INFERENCE_URL}"/int8" "imagenet_val_100.tar.gz")
inference_download_and_uncompress(${INT8_DATA_DIR} "${INFERENCE_URL}/int8" "imagenet_val_100_tail.tar.gz")
endif()
#resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_DATA_DIR}/resnet50")
if (NOT EXISTS ${INT8_RESNET50_MODEL_DIR})
inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} ${INFERENCE_URL}"/int8" "resnet50_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_RESNET50_MODEL_DIR} "${INFERENCE_URL}/int8" "resnet50_int8_model.tar.gz" )
endif()
inference_analysis_api_int8_test(test_analyzer_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
#mobilenet int8
set(INT8_MOBILENET_MODEL_DIR "${INT8_DATA_DIR}/mobilenet")
if (NOT EXISTS ${INT8_MOBILENET_MODEL_DIR})
inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} ${INFERENCE_URL}"/int8" "mobilenetv1_int8_model.tar.gz" )
inference_download_and_uncompress(${INT8_MOBILENET_MODEL_DIR} "${INFERENCE_URL}/int8" "mobilenetv1_int8_model.tar.gz" )
endif()
inference_analysis_api_int8_test(test_analyzer_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${INT8_DATA_DIR} analyzer_int8_image_classification_tester.cc SERIAL)
endif()

@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) {
config.EnableMKLDNN();
}
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),

@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) {
cfg.SetMKLDNNOp(op_list);
}
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]);
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
}

@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_int32(iterations, 0, "Number of iterations");
namespace paddle {
namespace inference {
namespace analysis {
@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames(false);
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
cfg->EnableMKLDNN();
cfg->pass_builder()->SetPasses(
{"infer_clean_graph_pass", "mkldnn_placement_pass",
"depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
"conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
"conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
"fc_fuse_pass", "is_test_pass"});
}
template <typename T>
@ -40,8 +43,8 @@ class TensorReader {
TensorReader(std::ifstream &file, size_t beginning_offset,
std::vector<int> shape, std::string name)
: file_(file), position(beginning_offset), shape_(shape), name_(name) {
numel =
std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<T>());
numel = std::accumulate(shape_.begin(), shape_.end(), size_t{1},
std::multiplies<size_t>());
}
PaddleTensor NextBatch() {
@ -71,10 +74,14 @@ class TensorReader {
};
std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
const std::vector<std::vector<PaddleTensor>> &test_data, int num_images) {
const std::vector<std::vector<PaddleTensor>> &test_data,
int num_images = FLAGS_warmup_batch_size) {
int test_data_batch_size = test_data[0][0].shape[0];
CHECK_LE(static_cast<size_t>(num_images),
test_data.size() * test_data_batch_size);
auto iterations_max = test_data.size();
PADDLE_ENFORCE(
static_cast<size_t>(num_images) <= iterations_max * test_data_batch_size,
"The requested quantization warmup data size " +
std::to_string(num_images) + " is bigger than all test data size.");
PaddleTensor images;
images.name = "input";
@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
std::vector<int> image_batch_shape{batch_size, 3, 224, 224};
std::vector<int> label_batch_shape{batch_size, 1};
auto images_offset_in_file = static_cast<size_t>(file.tellg());
auto labels_offset_in_file =
static_cast<size_t>(file.tellg()) +
sizeof(float) * total_images *
std::accumulate(image_batch_shape.begin() + 1,
image_batch_shape.end(), 1, std::multiplies<int>());
images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;
TensorReader<float> image_reader(file, 0, image_batch_shape, "input");
TensorReader<float> image_reader(file, images_offset_in_file,
image_batch_shape, "input");
TensorReader<int64_t> label_reader(file, labels_offset_in_file,
label_batch_shape, "label");
auto iterations = total_images / batch_size;
if (FLAGS_iterations > 0 && FLAGS_iterations < iterations)
iterations = FLAGS_iterations;
for (auto i = 0; i < iterations; i++) {
auto iterations_max = total_images / batch_size;
for (auto i = 0; i < iterations_max; i++) {
auto images = image_reader.NextBatch();
auto labels = label_reader.NextBatch();
inputs->emplace_back(
@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) {
AnalysisConfig q_cfg;
SetConfig(&q_cfg);
// read data from file and prepare batches with test data
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all, 100);
SetInput(&input_slots_all);
// prepare warmup batch from input data read earlier
// warmup batch size can be different than batch size
std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
GetWarmupData(input_slots_all, 100);
GetWarmupData(input_slots_all);
// configure quantizer
q_cfg.EnableMkldnnQuantizer();
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100);
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
CompareQuantizedAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
reinterpret_cast<const PaddlePredictor::Config *>(&q_cfg),
input_slots_all);
CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all);
}
} // namespace analysis

@ -124,7 +124,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_LAC, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) {
24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25,
44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14,
15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23};
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(outputs.size(), 0);
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t);
PADDLE_ENFORCE_GE(size, batch1_size);
int64_t *pdata = static_cast<int64_t *>(outputs[0].data.data());
int64_t *pdata = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < batch1_size; ++i) {
EXPECT_EQ(pdata[i], lac_ref_data[i]);
}

@ -96,7 +96,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
if (use_mkldnn) {
cfg.EnableMKLDNN();
@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_EQ(outputs.size(), 2UL);
for (auto &output : outputs) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
PADDLE_ENFORCE_EQ(outputs.back().size(), 2UL);
for (auto &output : outputs.back()) {
size_t size = GetSize(output);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(output.data.data());

@ -106,7 +106,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool memory_load = false) {
AnalysisConfig cfg;
SetConfig(&cfg, memory_load);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -117,10 +117,12 @@ void profile(bool memory_load = false) {
// the first inference result
const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26,
48, 39, 38, 16, 25};
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(outputs.size(), 0);
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
int64_t *result = static_cast<int64_t *>(outputs[0].data.data());
int64_t *result = static_cast<int64_t *>(output[0].data.data());
for (size_t i = 0; i < std::min(11UL, size); i++) {
EXPECT_EQ(result[i], chinese_ner_result_data[i]);
}

@ -127,7 +127,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_Pyramid_DNN, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data && !FLAGS_zero_copy) {
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(outputs.size(), 0);
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
EXPECT_GT(result[i], 0);

@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) {
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);

@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) {
SetConfig(&cfg);
cfg.DisableGpu();
cfg.SwitchIrDebug();
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) {
TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);

@ -126,7 +126,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_rnn2, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_GT(outputs.size(), 0);
size_t size = GetSize(outputs[0]);
auto output = outputs.back();
PADDLE_ENFORCE_GT(output.size(), 0);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
float *result = static_cast<float *>(output[0].data.data());
for (size_t i = 0; i < size; i++) {
EXPECT_NEAR(result[i], result_data[i], 1e-3);
}

@ -110,7 +110,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST(Analyzer_seq_conv1, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) {
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(outputs.size(), 0);
auto output = outputs.back();
PADDLE_ENFORCE_EQ(output.size(), 1UL);
size_t size = GetSize(output[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
float *result = static_cast<float *>(output[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
EXPECT_GT(result[i], 0);

@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg, use_mkldnn);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),

@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
cfg.SwitchIrDebug();
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) {
if (FLAGS_num_threads == 1) {
// Get output
LOG(INFO) << "get outputs " << outputs.size();
for (auto &output : outputs) {
PADDLE_ENFORCE_GT(outputs.size(), 0);
LOG(INFO) << "get outputs " << outputs.back().size();
for (auto &output : outputs.back()) {
LOG(INFO) << "output.shape: " << to_string(output.shape);
// no lod ?
CHECK_EQ(output.lod.size(), 0UL);

@ -186,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
if (use_mkldnn) {
cfg.EnableMKLDNN();
}

@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) {
cfg.EnableMKLDNN();
}
// cfg.pass_builder()->TurnOnDebug();
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) {
auto refer = ProcessALine(line);
file.close();
auto &output = outputs.front();
PADDLE_ENFORCE_GT(outputs.size(), 0);
auto &output = outputs.back().front();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
CHECK_EQ(numel, refer.data.size());
for (size_t i = 0; i < numel; ++i) {

File diff suppressed because it is too large Load Diff

@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
}
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> outputs;
if (use_analysis || use_tensorrt) {
AnalysisConfig config;
config.EnableUseGpu(100, 0);

Loading…
Cancel
Save