Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_data_balance
commit
5b4f283069
@ -0,0 +1,130 @@
|
||||
# Python Data Feeding
|
||||
|
||||
In the former implementation of Paddle Fluid, there are two ways to feed data:
|
||||
|
||||
- Use `reader_op` in backend C++ side. This method only supports data feeding from recordio files and random data generators, but supports many kinds of `decorated_readers`. For examples, `double_buffer_reader` uses two threads to achieve better performance: one for time-consuming I/O operations, and the other for `Executor::Run()`. See [C++ Data Feeding](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/cpp_data_feeding.md) for details.
|
||||
|
||||
- Feed data directly using `DataFeeder.feed()` in Python codes. It is more flexible than the first way. Many kinds of preprocessing steps can be performed before feeding using Python or any other languages, instead of adding many uncommon `operators` in C++ side. But this method is less efficient: the program cannot read the next mini-batch data before `Executor::Run()` ends. Moreover, `decorated_readers` such as `double_buffer_reader` cannot be used for better performance.
|
||||
|
||||
In this document, we design a Python Data Feeding process combining the efficiency of the first way and the flexibility of the second way. A data queue `LoDTensorBlockingQueue` is designed to be shared by the Python and C++ side, while `LoDTensorArray` is pushed into the queue in Python side and `reader_op` in C++ side reads out the data from the queue.
|
||||
|
||||
|
||||
## Design of LoDTensorBlockingQueue
|
||||
`LoDTensorBlockingQueue` is a blocking queue with a fixed `capacity` and accepts `std::vector<framework::LoDTensor>` with shapes indicated by `dims`. Since `LoDTensorBlockingQueue` must be constructed using `capacity` and `dims`, it cannot be a `Variable` type. Therefore, a `LoDTensorBlockingQueueHolder` is designed to defer construction of `LoDTensorBlockingQueue`.
|
||||
|
||||
```C++
|
||||
class LoDTensorBlockingQueueHolder;
|
||||
|
||||
class LoDTensorBlockingQueue {
|
||||
friend class LoDTensorBlockingQueueHolder;
|
||||
private:
|
||||
// `LoDTensorBlockingQueue` can only be constructed by
|
||||
// `LoDTensorBlockingQueueHolder::InitOnce()`
|
||||
LoDTensorBlockingQueue(size_t capacity, const std::vector<framework::DDim>& dims);
|
||||
|
||||
public:
|
||||
size_t Size() const { return queue_.Size(); } // Get the current size of the queue
|
||||
|
||||
size_t Cap() const { return queue_.Cap(); }// Get the capacity of the queue
|
||||
|
||||
void Close() { return queue_.Close(); }
|
||||
|
||||
bool IsClosed() const { return queue_.IsClosed(); }
|
||||
|
||||
// Block if Size() == Cap()
|
||||
// Return false only when queue_.IsClosed() == true
|
||||
bool Push(const std::vector<framework::LoDTensor> &lod_tensor_vec);
|
||||
|
||||
// Block if Size() == 0.
|
||||
// *Success == false when queue_.IsClosed() == true
|
||||
std::vector<framework::LoDTensor> Pop(bool *success = nullptr);
|
||||
|
||||
private:
|
||||
// Use reader::BlockingQueue as the inner data structure
|
||||
BlockingQueue<std::vector<framework::LoDTensor>> queue_;
|
||||
std::vector<framework::DDim> dims_;
|
||||
};
|
||||
|
||||
class LoDTensorBlockingQueueHolder {
|
||||
public:
|
||||
// Call the constructor of `LoDTensorBlockingQueue` to create queue_
|
||||
// `InitOnce` can only called once, otherwise an exception would raise
|
||||
void InitOnce(size_t capacity, const std::vector<framework::DDim>& dims) {
|
||||
PADDLE_ENFORCE(queue_ == nullptr);
|
||||
queue_.reset(new LoDTensorBlockingQueue(capacity, dims));
|
||||
}
|
||||
|
||||
const std::shared_ptr<LoDTensorBlockingQueue>& GetQueue() const { return queue_; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<LoDTensorBlockingQueue> queue_;
|
||||
};
|
||||
```
|
||||
|
||||
There are some major things that must be concerned:
|
||||
- `LoDTensorBlockingQueueHolder` should be a `Variable` in global scope, so that `reader_op` can find it when reading data.
|
||||
- A `Variable` of `LoDTensorBlockingQueueHolder` but not `VarDesc` must be created in Python code before `Executor::Run()` so that `Executor::Run()` can get the feeding data when it is called.
|
||||
- `Create_reader_op` should accept the name of the `LoDTensorBlockingQueueHolder` variable as an input.
|
||||
|
||||
|
||||
## Release of the GIL in pybind
|
||||
`Pybind11::gil_scoped_release` is used to release GIL (Global Interpreter Lock) when `LoDTensorBlockingQueue::Push()` or `Executor::Run()` method are invoked in Python side, making `LoDTensorBlockingQueue::Push()` and `Executor::Run()` run in parallel.
|
||||
|
||||
|
||||
## Design of PyReader
|
||||
`PyReader` is a reader which holds a `LoDTensorBlockingQueue` object.
|
||||
```C++
|
||||
class PyReader : public ReaderBase {
|
||||
public:
|
||||
explicit PyReader(const std::shared_ptr<LoDTensorBlockingQueue>& queue);
|
||||
|
||||
void ReadNext(std::vector<framework::LoDTensor>* out) override {
|
||||
bool success;
|
||||
*out = queue_->Pop(&success);
|
||||
if (!success) out->clear();
|
||||
}
|
||||
|
||||
void ReInit() override { return; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<LoDTensorBlockingQueue> queue_;
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
## Design of CreatePyReaderOp
|
||||
`CreatePyReaderOp` is used to create the `PyReader` object. It requires an input `blocking_queue` which indicates the name of the `LoDTensorBlockingQueueHolder` variable.
|
||||
```C++
|
||||
class CreatePyReaderOp : public framework::OperatorBase {
|
||||
public:
|
||||
using framework::OperatorBase::OperatorBase;
|
||||
private:
|
||||
void RunImpl(const framework::Scope& scope,
|
||||
const platform::Place& dev_place) const override {
|
||||
auto* out = scope.FindVar(Output("Out"))
|
||||
->template GetMutable<framework::ReaderHolder>();
|
||||
if (out->Get() != nullptr) return;
|
||||
|
||||
const std::string& queue_name = Input("blocking_queue");
|
||||
auto* queue_holder_var = scope.FindVar(queue_name);
|
||||
PADDLE_ENFORCE(queue_holder_var != nullptr);
|
||||
auto* queue_holder = queue_holder_var
|
||||
->template GetMutable<framework::LoDTensorBlockingQueueHolder>();
|
||||
out->Reset(new PyReader(queue_holder->GetQueue()));
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Design of Python codes
|
||||
The design of Python codes are as follows. First, we construct a variable of `LoDTensorBlockingQueueHolder` and init it with given parameters, returning the `LoDTensorBlockingQueue` object after initialization. After that, a layer of `CreatePyReaderOp` is constructed and accepts the name of the `LoDTensorBlockingQueueHolder` variable. The `LoDTensorBlockingQueue` object and result of the layer are both returned.
|
||||
```Python
|
||||
def py_reader(capacity, shapes):
|
||||
queue_name = unique_name.generate("lod_tensor_blocking_queue")
|
||||
var = global_scope().var(feeder_name) # create LoDTensorBlockingQueueHolder Variable
|
||||
feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes) # init the queue
|
||||
out = create_var()
|
||||
create_py_reader_op_with_queue_name(
|
||||
inputs={'blocking_queue': queue_name},
|
||||
outputs={'Out':[out]})
|
||||
return out, feed_queue
|
||||
```
|
@ -0,0 +1,36 @@
|
||||
# Infernce Demos
|
||||
|
||||
Input data format:
|
||||
|
||||
- Each line contains a single record
|
||||
- Each record's format is
|
||||
|
||||
```
|
||||
<space splitted floats as data>\t<space splitted ints as shape>
|
||||
```
|
||||
|
||||
Follow the C++ codes in `vis_demo.cc`.
|
||||
|
||||
## MobileNet
|
||||
|
||||
To execute the demo, simply run
|
||||
|
||||
```sh
|
||||
./mobilenet_inference_demo --modeldir <model> --data <datafile>
|
||||
```
|
||||
|
||||
## SE-ResNeXt-50
|
||||
|
||||
To execute the demo, simply run
|
||||
|
||||
```sh
|
||||
./se_resnext50_inference_demo --modeldir <model> --data <datafile>
|
||||
```
|
||||
|
||||
## OCR
|
||||
|
||||
To execute the demo, simply run
|
||||
|
||||
```sh
|
||||
./ocr_inference_demo --modeldir <model> --data <datafile>
|
||||
```
|
@ -0,0 +1,68 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/contrib/inference/paddle_inference_api.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace demo {
|
||||
|
||||
static void split(const std::string& str,
|
||||
char sep,
|
||||
std::vector<std::string>* pieces) {
|
||||
pieces->clear();
|
||||
if (str.empty()) {
|
||||
return;
|
||||
}
|
||||
size_t pos = 0;
|
||||
size_t next = str.find(sep, pos);
|
||||
while (next != std::string::npos) {
|
||||
pieces->push_back(str.substr(pos, next - pos));
|
||||
pos = next + 1;
|
||||
next = str.find(sep, pos);
|
||||
}
|
||||
if (!str.substr(pos).empty()) {
|
||||
pieces->push_back(str.substr(pos));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a summary of a PaddleTensor content.
|
||||
*/
|
||||
static std::string SummaryTensor(const PaddleTensor& tensor) {
|
||||
std::stringstream ss;
|
||||
int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype);
|
||||
|
||||
ss << "data[:10]\t";
|
||||
switch (tensor.dtype) {
|
||||
case PaddleDType::INT64: {
|
||||
for (int i = 0; i < std::min(num_elems, 10); i++) {
|
||||
ss << static_cast<int64_t*>(tensor.data.data())[i] << " ";
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PaddleDType::FLOAT32:
|
||||
for (int i = 0; i < std::min(num_elems, 10); i++) {
|
||||
ss << static_cast<float*>(tensor.data.data())[i] << " ";
|
||||
}
|
||||
break;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace demo
|
||||
} // namespace paddle
|
@ -0,0 +1,149 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
/*
|
||||
* This file contains demo for mobilenet, se-resnext50 and ocr.
|
||||
*/
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
|
||||
#include <gtest/gtest.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include "paddle/contrib/inference/demo/utils.h"
|
||||
#include "paddle/contrib/inference/paddle_inference_api.h"
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
DECLARE_double(fraction_of_gpu_memory_to_use);
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace demo {
|
||||
|
||||
DEFINE_string(modeldir, "", "Directory of the inference model.");
|
||||
DEFINE_string(refer, "", "path to reference result for comparison.");
|
||||
DEFINE_string(
|
||||
data,
|
||||
"",
|
||||
"path of data; each line is a record, format is "
|
||||
"'<space splitted floats as data>\t<space splitted ints as shape'");
|
||||
|
||||
struct Record {
|
||||
std::vector<float> data;
|
||||
std::vector<int32_t> shape;
|
||||
};
|
||||
|
||||
void split(const std::string& str, char sep, std::vector<std::string>* pieces);
|
||||
|
||||
Record ProcessALine(const std::string& line) {
|
||||
LOG(INFO) << "process a line";
|
||||
std::vector<std::string> columns;
|
||||
split(line, '\t', &columns);
|
||||
CHECK_EQ(columns.size(), 2UL)
|
||||
<< "data format error, should be <data>\t<shape>";
|
||||
|
||||
Record record;
|
||||
std::vector<std::string> data_strs;
|
||||
split(columns[0], ' ', &data_strs);
|
||||
for (auto& d : data_strs) {
|
||||
record.data.push_back(std::stof(d));
|
||||
}
|
||||
|
||||
std::vector<std::string> shape_strs;
|
||||
split(columns[1], ' ', &shape_strs);
|
||||
for (auto& s : shape_strs) {
|
||||
record.shape.push_back(std::stoi(s));
|
||||
}
|
||||
LOG(INFO) << "data size " << record.data.size();
|
||||
LOG(INFO) << "data shape size " << record.shape.size();
|
||||
return record;
|
||||
}
|
||||
|
||||
void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
|
||||
std::string line;
|
||||
std::ifstream file(referfile);
|
||||
std::getline(file, line);
|
||||
auto refer = ProcessALine(line);
|
||||
file.close();
|
||||
|
||||
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
|
||||
LOG(INFO) << "predictor output numel " << numel;
|
||||
LOG(INFO) << "reference output numel " << refer.data.size();
|
||||
EXPECT_EQ(numel, refer.data.size());
|
||||
switch (output.dtype) {
|
||||
case PaddleDType::INT64: {
|
||||
for (size_t i = 0; i < numel; ++i) {
|
||||
EXPECT_EQ(static_cast<int64_t*>(output.data.data())[i], refer.data[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PaddleDType::FLOAT32:
|
||||
for (size_t i = 0; i < numel; ++i) {
|
||||
EXPECT_NEAR(
|
||||
static_cast<float*>(output.data.data())[i], refer.data[i], 1e-5);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the native fluid engine to inference the demo.
|
||||
*/
|
||||
void Main(bool use_gpu) {
|
||||
NativeConfig config;
|
||||
config.param_file = FLAGS_modeldir + "/__params__";
|
||||
config.prog_file = FLAGS_modeldir + "/__model__";
|
||||
config.use_gpu = use_gpu;
|
||||
config.device = 0;
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
config.fraction_of_gpu_memory = FLAGS_fraction_of_gpu_memory_to_use;
|
||||
#endif
|
||||
|
||||
LOG(INFO) << "init predictor";
|
||||
auto predictor =
|
||||
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
|
||||
|
||||
LOG(INFO) << "begin to process data";
|
||||
// Just a single batch of data.
|
||||
std::string line;
|
||||
std::ifstream file(FLAGS_data);
|
||||
std::getline(file, line);
|
||||
auto record = ProcessALine(line);
|
||||
file.close();
|
||||
|
||||
// Inference.
|
||||
PaddleTensor input{
|
||||
.name = "xx",
|
||||
.shape = record.shape,
|
||||
.data = PaddleBuf(record.data.data(), record.data.size() * sizeof(float)),
|
||||
.dtype = PaddleDType::FLOAT32};
|
||||
|
||||
LOG(INFO) << "run executor";
|
||||
std::vector<PaddleTensor> output;
|
||||
predictor->Run({input}, &output);
|
||||
|
||||
LOG(INFO) << "output.size " << output.size();
|
||||
auto& tensor = output.front();
|
||||
LOG(INFO) << "output: " << SummaryTensor(tensor);
|
||||
|
||||
// compare with reference result
|
||||
CheckOutput(FLAGS_refer, tensor);
|
||||
}
|
||||
|
||||
TEST(demo, vis_demo_cpu) { Main(false /*use_gpu*/); }
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
TEST(demo, vis_demo_gpu) { Main(true /*use_gpu*/); }
|
||||
#endif
|
||||
} // namespace demo
|
||||
} // namespace paddle
|
@ -0,0 +1,126 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/contrib/inference/paddle_inference_api.h"
|
||||
#include "paddle/contrib/inference/paddle_inference_api_impl.h"
|
||||
#include "paddle/fluid/inference/analysis/analyzer.h"
|
||||
#include "paddle/fluid/inference/utils/singleton.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
using inference::analysis::Argument;
|
||||
using inference::Singleton;
|
||||
using inference::analysis::Analyzer;
|
||||
using framework::proto::ProgramDesc;
|
||||
|
||||
class TensorRTSubgraphPredictor : public NativePaddlePredictor {
|
||||
public:
|
||||
explicit TensorRTSubgraphPredictor(const TensorRTConfig& config)
|
||||
: NativePaddlePredictor(config), config_(config) {}
|
||||
|
||||
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
|
||||
VLOG(3) << "Predictor::init()";
|
||||
|
||||
if (config_.use_gpu) {
|
||||
place_ = paddle::platform::CUDAPlace(config_.device);
|
||||
} else {
|
||||
place_ = paddle::platform::CPUPlace();
|
||||
}
|
||||
if (parent_scope) {
|
||||
scope_ = parent_scope;
|
||||
sub_scope_ = &(parent_scope->NewScope());
|
||||
} else {
|
||||
paddle::framework::InitDevices(false);
|
||||
scope_.reset(new paddle::framework::Scope());
|
||||
}
|
||||
|
||||
executor_.reset(new paddle::framework::Executor(place_));
|
||||
|
||||
// Initialize the inference program
|
||||
if (!config_.model_dir.empty()) {
|
||||
// Parameters are saved in separate files sited in
|
||||
// the specified `dirname`.
|
||||
inference_program_ = paddle::inference::Load(
|
||||
executor_.get(), scope_.get(), config_.model_dir);
|
||||
} else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
|
||||
// All parameters are saved in a single file.
|
||||
// The file names should be consistent with that used
|
||||
// in Python API `fluid.io.save_inference_model`.
|
||||
inference_program_ = paddle::inference::Load(
|
||||
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
|
||||
} else {
|
||||
LOG(ERROR) << "fail to load inference model.";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Analyze inference_program
|
||||
Argument argument;
|
||||
argument.origin_program_desc.reset(
|
||||
new ProgramDesc(*inference_program_->Proto()));
|
||||
Singleton<Analyzer>::Global().Run(&argument);
|
||||
CHECK(argument.transformed_program_desc);
|
||||
VLOG(5) << "transformed program:\n"
|
||||
<< argument.transformed_program_desc->SerializeAsString();
|
||||
VLOG(5) << "to prepare executor";
|
||||
*inference_program_->Proto() = *argument.transformed_program_desc;
|
||||
ctx_ = executor_->Prepare(*inference_program_, 0);
|
||||
|
||||
VLOG(5) << "to create variables";
|
||||
executor_->CreateVariables(
|
||||
*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
|
||||
|
||||
// Get the feed_target_names and fetch_target_names
|
||||
feed_target_names_ = inference_program_->GetFeedTargetNames();
|
||||
fetch_target_names_ = inference_program_->GetFetchTargetNames();
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
TensorRTConfig config_;
|
||||
};
|
||||
|
||||
template <>
|
||||
std::unique_ptr<PaddlePredictor>
|
||||
CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
|
||||
const TensorRTConfig& config) {
|
||||
VLOG(3) << "create TensorRTSubgraphPredictor";
|
||||
if (config.use_gpu) {
|
||||
// 1. GPU memeroy
|
||||
PADDLE_ENFORCE_GT(
|
||||
config.fraction_of_gpu_memory,
|
||||
0.f,
|
||||
"fraction_of_gpu_memory in the config should be set to range (0., 1.]");
|
||||
PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
|
||||
std::vector<std::string> flags;
|
||||
if (config.fraction_of_gpu_memory >= 0.0f ||
|
||||
config.fraction_of_gpu_memory <= 0.95f) {
|
||||
flags.push_back("dummpy");
|
||||
std::string flag = "--fraction_of_gpu_memory_to_use=" +
|
||||
std::to_string(config.fraction_of_gpu_memory);
|
||||
flags.push_back(flag);
|
||||
VLOG(3) << "set flag: " << flag;
|
||||
framework::InitGflags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<PaddlePredictor> predictor(
|
||||
new TensorRTSubgraphPredictor(config));
|
||||
if (!dynamic_cast<TensorRTSubgraphPredictor*>(predictor.get())
|
||||
->Init(nullptr)) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::move(predictor);
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,64 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "paddle/contrib/inference/paddle_inference_api.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
DEFINE_string(dirname, "", "Directory of the inference model.");
|
||||
|
||||
void Main(bool use_gpu) {
|
||||
//# 1. Create PaddlePredictor with a config.
|
||||
TensorRTConfig config;
|
||||
config.model_dir = FLAGS_dirname + "word2vec.inference.model";
|
||||
config.use_gpu = use_gpu;
|
||||
config.fraction_of_gpu_memory = 0.15;
|
||||
config.device = 0;
|
||||
auto predictor =
|
||||
CreatePaddlePredictor<TensorRTConfig,
|
||||
PaddleEngineKind::kAutoMixedTensorRT>(config);
|
||||
|
||||
for (int batch_id = 0; batch_id < 3; batch_id++) {
|
||||
//# 2. Prepare input.
|
||||
int64_t data[4] = {1, 2, 3, 4};
|
||||
|
||||
PaddleTensor tensor{.name = "",
|
||||
.shape = std::vector<int>({4, 1}),
|
||||
.data = PaddleBuf(data, sizeof(data)),
|
||||
.dtype = PaddleDType::INT64};
|
||||
|
||||
// For simplicity, we set all the slots with the same data.
|
||||
std::vector<PaddleTensor> slots(4, tensor);
|
||||
|
||||
//# 3. Run
|
||||
std::vector<PaddleTensor> outputs;
|
||||
CHECK(predictor->Run(slots, &outputs));
|
||||
|
||||
//# 4. Get output.
|
||||
ASSERT_EQ(outputs.size(), 1UL);
|
||||
LOG(INFO) << "output buffer size: " << outputs.front().data.length();
|
||||
const size_t num_elements = outputs.front().data.length() / sizeof(float);
|
||||
// The outputs' buffers are in CPU memory.
|
||||
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
|
||||
LOG(INFO) << static_cast<float*>(outputs.front().data.data())[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(paddle_inference_api_tensorrt_subgraph_engine, main) { Main(true); }
|
||||
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue