|
|
|
@ -13,22 +13,29 @@
|
|
|
|
|
// limitations under the License.
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
/*! \file paddle_api.h
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
/*! \namespace paddle
|
|
|
|
|
*/
|
|
|
|
|
namespace paddle {
|
|
|
|
|
|
|
|
|
|
// Data type.
|
|
|
|
|
/** paddle data type.
|
|
|
|
|
*/
|
|
|
|
|
enum PaddleDType {
|
|
|
|
|
FLOAT32,
|
|
|
|
|
INT64,
|
|
|
|
|
// TODO(Superjomn) support more data types if needed.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Memory menage for PaddleTensor.
|
|
|
|
|
/**
|
|
|
|
|
*\brief Memory menager for PaddleTensor.
|
|
|
|
|
*
|
|
|
|
|
*The PaddleBuf holds a buffer for data input or output. The memory can be
|
|
|
|
|
*allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
|
|
|
|
|
*should be reused for better performance.
|
|
|
|
@ -50,24 +57,32 @@ enum PaddleDType {
|
|
|
|
|
*/
|
|
|
|
|
class PaddleBuf {
|
|
|
|
|
public:
|
|
|
|
|
// PaddleBuf allocate memory internally, and manage it.
|
|
|
|
|
/** PaddleBuf allocate memory internally, and manage it.
|
|
|
|
|
*/
|
|
|
|
|
explicit PaddleBuf(size_t length)
|
|
|
|
|
: data_(new char[length]), length_(length), memory_owned_(true) {}
|
|
|
|
|
// Set external memory, the PaddleBuf won't manage it.
|
|
|
|
|
/** Set external memory, the PaddleBuf won't manage it.
|
|
|
|
|
*/
|
|
|
|
|
PaddleBuf(void* data, size_t length)
|
|
|
|
|
: data_(data), length_(length), memory_owned_{false} {}
|
|
|
|
|
// Copy only available when memory is managed externally.
|
|
|
|
|
/** Copy only available when memory is managed externally.
|
|
|
|
|
*/
|
|
|
|
|
explicit PaddleBuf(const PaddleBuf&);
|
|
|
|
|
|
|
|
|
|
// Resize the memory.
|
|
|
|
|
/** Resize the memory.
|
|
|
|
|
*/
|
|
|
|
|
void Resize(size_t length);
|
|
|
|
|
// Reset to external memory, with address and length set.
|
|
|
|
|
/** Reset to external memory, with address and length set.
|
|
|
|
|
*/
|
|
|
|
|
void Reset(void* data, size_t length);
|
|
|
|
|
// Tell whether the buffer is empty.
|
|
|
|
|
/** Tell whether the buffer is empty.
|
|
|
|
|
*/
|
|
|
|
|
bool empty() const { return length_ == 0; }
|
|
|
|
|
// Get the memory address.
|
|
|
|
|
/** Get the memory address.
|
|
|
|
|
*/
|
|
|
|
|
void* data() const { return data_; }
|
|
|
|
|
// Get the memory length.
|
|
|
|
|
/** Get the memory length.
|
|
|
|
|
*/
|
|
|
|
|
size_t length() const { return length_; }
|
|
|
|
|
|
|
|
|
|
~PaddleBuf() { Free(); }
|
|
|
|
@ -83,7 +98,8 @@ class PaddleBuf {
|
|
|
|
|
bool memory_owned_{true};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Basic input and output data structure for PaddlePredictor.
|
|
|
|
|
/** Basic input and output data structure for PaddlePredictor.
|
|
|
|
|
*/
|
|
|
|
|
struct PaddleTensor {
|
|
|
|
|
PaddleTensor() = default;
|
|
|
|
|
std::string name; // variable name.
|
|
|
|
@ -94,19 +110,22 @@ struct PaddleTensor {
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
|
|
|
|
|
// Tensor without copy, currently only supports AnalysisPredictor.
|
|
|
|
|
/** Tensor without copy, currently only supports AnalysisPredictor.
|
|
|
|
|
*/
|
|
|
|
|
class ZeroCopyTensor {
|
|
|
|
|
public:
|
|
|
|
|
void Reshape(const std::vector<int>& shape);
|
|
|
|
|
|
|
|
|
|
// Get the memory in CPU or GPU with specific data type, should Reshape first
|
|
|
|
|
// to tell the data size.
|
|
|
|
|
// Once can directly call this data to feed the data.
|
|
|
|
|
// This is for write the input tensor.
|
|
|
|
|
/** Get the memory in CPU or GPU with specific data type, should Reshape first
|
|
|
|
|
* to tell the data size.
|
|
|
|
|
* Once can directly call this data to feed the data.
|
|
|
|
|
* This is for write the input tensor.
|
|
|
|
|
*/
|
|
|
|
|
template <typename T>
|
|
|
|
|
T* mutable_data(PaddlePlace place);
|
|
|
|
|
// Get the memory directly, will return the place and memory size by pointer.
|
|
|
|
|
// This is for reading the output tensor.
|
|
|
|
|
/** Get the memory directly, will return the place and memory size by pointer.
|
|
|
|
|
* This is for reading the output tensor.
|
|
|
|
|
*/
|
|
|
|
|
template <typename T>
|
|
|
|
|
T* data(PaddlePlace* place, int* size) const;
|
|
|
|
|
|
|
|
|
@ -128,8 +147,7 @@ class ZeroCopyTensor {
|
|
|
|
|
void* scope_{nullptr};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A simple Inference API for Paddle.
|
|
|
|
|
/** A simple Inference API for Paddle.
|
|
|
|
|
*/
|
|
|
|
|
class PaddlePredictor {
|
|
|
|
|
public:
|
|
|
|
@ -138,18 +156,20 @@ class PaddlePredictor {
|
|
|
|
|
PaddlePredictor(const PaddlePredictor&) = delete;
|
|
|
|
|
PaddlePredictor& operator=(const PaddlePredictor&) = delete;
|
|
|
|
|
|
|
|
|
|
// Predict an record.
|
|
|
|
|
// The caller should be responsible for allocating and releasing the memory of
|
|
|
|
|
// `inputs`. `inputs` should be available until Run returns. Caller should be
|
|
|
|
|
// responsible for the output tensor's buffer, either allocated or passed from
|
|
|
|
|
// outside.
|
|
|
|
|
/** Predict an record.
|
|
|
|
|
* The caller should be responsible for allocating and releasing the memory of
|
|
|
|
|
* `inputs`. `inputs` should be available until Run returns. Caller should be
|
|
|
|
|
* responsible for the output tensor's buffer, either allocated or passed from
|
|
|
|
|
* outside.
|
|
|
|
|
*/
|
|
|
|
|
virtual bool Run(const std::vector<PaddleTensor>& inputs,
|
|
|
|
|
std::vector<PaddleTensor>* output_data,
|
|
|
|
|
int batch_size = -1) = 0;
|
|
|
|
|
|
|
|
|
|
// Zero copy input and output optimization.
|
|
|
|
|
// Get the input or output tensors, and operate on their memory directly,
|
|
|
|
|
// without copy.
|
|
|
|
|
/** Zero copy input and output optimization.
|
|
|
|
|
* Get the input or output tensors, and operate on their memory directly,
|
|
|
|
|
* without copy.
|
|
|
|
|
*/
|
|
|
|
|
virtual std::unique_ptr<ZeroCopyTensor> GetInputTensor(
|
|
|
|
|
const std::string& name) {
|
|
|
|
|
return nullptr;
|
|
|
|
@ -160,16 +180,19 @@ class PaddlePredictor {
|
|
|
|
|
}
|
|
|
|
|
virtual bool ZeroCopyRun() { return false; }
|
|
|
|
|
|
|
|
|
|
// Clone a predictor that share the model weights, the Cloned predictor should
|
|
|
|
|
// be thread-safe.
|
|
|
|
|
/** Clone a predictor that share the model weights, the Cloned predictor
|
|
|
|
|
* should be thread-safe.
|
|
|
|
|
*/
|
|
|
|
|
virtual std::unique_ptr<PaddlePredictor> Clone() = 0;
|
|
|
|
|
|
|
|
|
|
// Destroy the Predictor.
|
|
|
|
|
/** Destroy the Predictor.
|
|
|
|
|
*/
|
|
|
|
|
virtual ~PaddlePredictor() = default;
|
|
|
|
|
|
|
|
|
|
// The common configs for all the predictors.
|
|
|
|
|
/** The common configs for all the predictors.
|
|
|
|
|
*/
|
|
|
|
|
struct Config {
|
|
|
|
|
std::string model_dir; // path to the model directory.
|
|
|
|
|
std::string model_dir; /*!< path to the model directory. */
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
@ -177,17 +200,21 @@ struct NativeConfig : public PaddlePredictor::Config {
|
|
|
|
|
// GPU related fields.
|
|
|
|
|
bool use_gpu{false};
|
|
|
|
|
int device{0};
|
|
|
|
|
float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed.
|
|
|
|
|
float fraction_of_gpu_memory{
|
|
|
|
|
-1.f}; /*!< Change to a float in (0,1] if needed. */
|
|
|
|
|
|
|
|
|
|
// Specify the exact path of program and parameter files.
|
|
|
|
|
std::string prog_file;
|
|
|
|
|
std::string param_file;
|
|
|
|
|
|
|
|
|
|
// Specify the variable's name of each input if input tensors don't follow the
|
|
|
|
|
// `feeds` and `fetches` of the phase `save_inference_model`.
|
|
|
|
|
/** Specify the variable's name of each input if input tensors don't follow
|
|
|
|
|
* the
|
|
|
|
|
* `feeds` and `fetches` of the phase `save_inference_model`.
|
|
|
|
|
*/
|
|
|
|
|
bool specify_input_name{false};
|
|
|
|
|
|
|
|
|
|
// Set and get the number of cpu math library threads.
|
|
|
|
|
/** Set and get the number of cpu math library threads.
|
|
|
|
|
*/
|
|
|
|
|
void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads) {
|
|
|
|
|
cpu_math_library_num_threads_ = cpu_math_library_num_threads;
|
|
|
|
|
}
|
|
|
|
@ -201,28 +228,33 @@ struct NativeConfig : public PaddlePredictor::Config {
|
|
|
|
|
int cpu_math_library_num_threads_{1};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// A factory to help create different predictors.
|
|
|
|
|
//
|
|
|
|
|
// Usage:
|
|
|
|
|
//
|
|
|
|
|
// NativeConfig config;
|
|
|
|
|
// ... // change the configs.
|
|
|
|
|
// auto native_predictor = CreatePaddlePredictor(config);
|
|
|
|
|
//
|
|
|
|
|
// FOR EXTENSION DEVELOPER:
|
|
|
|
|
// Different predictors are designated by config type. Similar configs can be
|
|
|
|
|
// merged, but there shouldn't be a huge config containing different fields for
|
|
|
|
|
// more than one kind of predictors.
|
|
|
|
|
/*! \fn std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&
|
|
|
|
|
* config);
|
|
|
|
|
*
|
|
|
|
|
* \brief A factory to help create different predictors.
|
|
|
|
|
*
|
|
|
|
|
* Usage:
|
|
|
|
|
*
|
|
|
|
|
* NativeConfig config;
|
|
|
|
|
* ... // change the configs.
|
|
|
|
|
* auto native_predictor = CreatePaddlePredictor(config);
|
|
|
|
|
*
|
|
|
|
|
* FOR EXTENSION DEVELOPER:
|
|
|
|
|
* Different predictors are designated by config type. Similar configs can be
|
|
|
|
|
* merged, but there shouldn't be a huge config containing different fields for
|
|
|
|
|
* more than one kind of predictors.
|
|
|
|
|
*/
|
|
|
|
|
template <typename ConfigT>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
|
|
|
|
|
|
|
|
|
|
// NOTE The following APIs are too trivial, we will discard it in the following
|
|
|
|
|
// versions.
|
|
|
|
|
/** NOTE The following APIs are too trivial, we will discard it in the following
|
|
|
|
|
* versions.
|
|
|
|
|
*/
|
|
|
|
|
enum class PaddleEngineKind {
|
|
|
|
|
kNative = 0, // Use the native Fluid facility.
|
|
|
|
|
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
|
|
|
|
|
kAnalysis, // More optimization.
|
|
|
|
|
kAnakin // Use Anakin for inference, not mature yet.
|
|
|
|
|
kNative = 0, /*!< Use the native Fluid facility. */
|
|
|
|
|
kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */
|
|
|
|
|
kAnalysis, /*!< More optimization. */
|
|
|
|
|
kAnakin /*!< Use Anakin for inference, not mature yet. */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template <typename ConfigT, PaddleEngineKind engine>
|
|
|
|
|