|
|
@ -17,8 +17,6 @@ limitations under the License. */
|
|
|
|
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
|
|
|
|
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
|
|
|
|
#include "paddle/fluid/inference/tests/api/tester_helper.h"
|
|
|
|
#include "paddle/fluid/inference/tests/api/tester_helper.h"
|
|
|
|
|
|
|
|
|
|
|
|
DEFINE_int32(iterations, 0, "Number of iterations");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
|
namespace inference {
|
|
|
|
namespace inference {
|
|
|
|
namespace analysis {
|
|
|
|
namespace analysis {
|
|
|
@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) {
|
|
|
|
cfg->SwitchIrOptim();
|
|
|
|
cfg->SwitchIrOptim();
|
|
|
|
cfg->SwitchSpecifyInputNames(false);
|
|
|
|
cfg->SwitchSpecifyInputNames(false);
|
|
|
|
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
|
|
|
|
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
|
|
|
|
|
|
|
|
|
|
|
|
cfg->EnableMKLDNN();
|
|
|
|
cfg->EnableMKLDNN();
|
|
|
|
|
|
|
|
cfg->pass_builder()->SetPasses(
|
|
|
|
|
|
|
|
{"infer_clean_graph_pass", "mkldnn_placement_pass",
|
|
|
|
|
|
|
|
"depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
|
|
|
|
|
|
|
|
"conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
|
|
|
|
|
|
|
|
"conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
|
|
|
|
|
|
|
|
"fc_fuse_pass", "is_test_pass"});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
template <typename T>
|
|
|
@ -40,8 +43,8 @@ class TensorReader {
|
|
|
|
TensorReader(std::ifstream &file, size_t beginning_offset,
|
|
|
|
TensorReader(std::ifstream &file, size_t beginning_offset,
|
|
|
|
std::vector<int> shape, std::string name)
|
|
|
|
std::vector<int> shape, std::string name)
|
|
|
|
: file_(file), position(beginning_offset), shape_(shape), name_(name) {
|
|
|
|
: file_(file), position(beginning_offset), shape_(shape), name_(name) {
|
|
|
|
numel =
|
|
|
|
numel = std::accumulate(shape_.begin(), shape_.end(), size_t{1},
|
|
|
|
std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<T>());
|
|
|
|
std::multiplies<size_t>());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
PaddleTensor NextBatch() {
|
|
|
|
PaddleTensor NextBatch() {
|
|
|
@ -71,10 +74,14 @@ class TensorReader {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
|
|
|
|
std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &test_data, int num_images) {
|
|
|
|
const std::vector<std::vector<PaddleTensor>> &test_data,
|
|
|
|
|
|
|
|
int num_images = FLAGS_warmup_batch_size) {
|
|
|
|
int test_data_batch_size = test_data[0][0].shape[0];
|
|
|
|
int test_data_batch_size = test_data[0][0].shape[0];
|
|
|
|
CHECK_LE(static_cast<size_t>(num_images),
|
|
|
|
auto iterations_max = test_data.size();
|
|
|
|
test_data.size() * test_data_batch_size);
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
|
|
|
static_cast<size_t>(num_images) <= iterations_max * test_data_batch_size,
|
|
|
|
|
|
|
|
"The requested quantization warmup data size " +
|
|
|
|
|
|
|
|
std::to_string(num_images) + " is bigger than all test data size.");
|
|
|
|
|
|
|
|
|
|
|
|
PaddleTensor images;
|
|
|
|
PaddleTensor images;
|
|
|
|
images.name = "input";
|
|
|
|
images.name = "input";
|
|
|
@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<int> image_batch_shape{batch_size, 3, 224, 224};
|
|
|
|
std::vector<int> image_batch_shape{batch_size, 3, 224, 224};
|
|
|
|
std::vector<int> label_batch_shape{batch_size, 1};
|
|
|
|
std::vector<int> label_batch_shape{batch_size, 1};
|
|
|
|
|
|
|
|
auto images_offset_in_file = static_cast<size_t>(file.tellg());
|
|
|
|
auto labels_offset_in_file =
|
|
|
|
auto labels_offset_in_file =
|
|
|
|
static_cast<size_t>(file.tellg()) +
|
|
|
|
images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;
|
|
|
|
sizeof(float) * total_images *
|
|
|
|
|
|
|
|
std::accumulate(image_batch_shape.begin() + 1,
|
|
|
|
|
|
|
|
image_batch_shape.end(), 1, std::multiplies<int>());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TensorReader<float> image_reader(file, 0, image_batch_shape, "input");
|
|
|
|
TensorReader<float> image_reader(file, images_offset_in_file,
|
|
|
|
|
|
|
|
image_batch_shape, "input");
|
|
|
|
TensorReader<int64_t> label_reader(file, labels_offset_in_file,
|
|
|
|
TensorReader<int64_t> label_reader(file, labels_offset_in_file,
|
|
|
|
label_batch_shape, "label");
|
|
|
|
label_batch_shape, "label");
|
|
|
|
|
|
|
|
|
|
|
|
auto iterations = total_images / batch_size;
|
|
|
|
auto iterations_max = total_images / batch_size;
|
|
|
|
if (FLAGS_iterations > 0 && FLAGS_iterations < iterations)
|
|
|
|
for (auto i = 0; i < iterations_max; i++) {
|
|
|
|
iterations = FLAGS_iterations;
|
|
|
|
|
|
|
|
for (auto i = 0; i < iterations; i++) {
|
|
|
|
|
|
|
|
auto images = image_reader.NextBatch();
|
|
|
|
auto images = image_reader.NextBatch();
|
|
|
|
auto labels = label_reader.NextBatch();
|
|
|
|
auto labels = label_reader.NextBatch();
|
|
|
|
inputs->emplace_back(
|
|
|
|
inputs->emplace_back(
|
|
|
@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) {
|
|
|
|
AnalysisConfig q_cfg;
|
|
|
|
AnalysisConfig q_cfg;
|
|
|
|
SetConfig(&q_cfg);
|
|
|
|
SetConfig(&q_cfg);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// read data from file and prepare batches with test data
|
|
|
|
std::vector<std::vector<PaddleTensor>> input_slots_all;
|
|
|
|
std::vector<std::vector<PaddleTensor>> input_slots_all;
|
|
|
|
SetInput(&input_slots_all, 100);
|
|
|
|
SetInput(&input_slots_all);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// prepare warmup batch from input data read earlier
|
|
|
|
|
|
|
|
// warmup batch size can be different than batch size
|
|
|
|
std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
|
|
|
|
std::shared_ptr<std::vector<PaddleTensor>> warmup_data =
|
|
|
|
GetWarmupData(input_slots_all, 100);
|
|
|
|
GetWarmupData(input_slots_all);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// configure quantizer
|
|
|
|
q_cfg.EnableMkldnnQuantizer();
|
|
|
|
q_cfg.EnableMkldnnQuantizer();
|
|
|
|
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
|
|
|
|
q_cfg.mkldnn_quantizer_config()->SetWarmupData(warmup_data);
|
|
|
|
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(100);
|
|
|
|
q_cfg.mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_warmup_batch_size);
|
|
|
|
|
|
|
|
|
|
|
|
CompareQuantizedAndAnalysis(
|
|
|
|
CompareQuantizedAndAnalysis(&cfg, &q_cfg, input_slots_all);
|
|
|
|
reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
|
|
|
|
|
|
|
|
reinterpret_cast<const PaddlePredictor::Config *>(&q_cfg),
|
|
|
|
|
|
|
|
input_slots_all);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace analysis
|
|
|
|
} // namespace analysis
|
|
|
|