|
|
|
@ -28,34 +28,61 @@ limitations under the License. */
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
|
|
|
|
|
|
// Data type.
|
|
|
|
|
enum PaddleDType {
|
|
|
|
|
FLOAT32,
|
|
|
|
|
INT64,
|
|
|
|
|
// TODO(Superjomn) support more data types if needed.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Memory menage for PaddleTensor.
|
|
|
|
|
* The PaddleBuf holds a buffer for data input or output. The memory can be
|
|
|
|
|
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
|
|
|
|
|
* should be reused for better performance.
|
|
|
|
|
*
|
|
|
|
|
* For user allocated memory, the following API can be used:
|
|
|
|
|
* - PaddleBuf(void* data, size_t length) to set an external memory by
|
|
|
|
|
* specifying
|
|
|
|
|
* the memory address and length.
|
|
|
|
|
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
|
|
|
|
|
* memory.
|
|
|
|
|
* ATTENTION, for user allocated memory, deallocation should be done by users
|
|
|
|
|
* externally after the program finished. The PaddleBuf won't do any allocation
|
|
|
|
|
* or deallocation.
|
|
|
|
|
*
|
|
|
|
|
* To have the PaddleBuf allocate and manage the memory:
|
|
|
|
|
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
|
|
|
|
|
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
|
|
|
|
|
* if the allocated memory is larger than `length`, nothing will done.
|
|
|
|
|
*/
|
|
|
|
|
class PaddleBuf {
|
|
|
|
|
public:
|
|
|
|
|
PaddleBuf() = default;
|
|
|
|
|
PaddleBuf(PaddleBuf&& other);
|
|
|
|
|
// Copy only available when memory is managed externally.
|
|
|
|
|
explicit PaddleBuf(const PaddleBuf&);
|
|
|
|
|
PaddleBuf& operator=(const PaddleBuf&);
|
|
|
|
|
PaddleBuf& operator=(PaddleBuf&&);
|
|
|
|
|
// Do not own the memory.
|
|
|
|
|
PaddleBuf(void* data, size_t length)
|
|
|
|
|
: data_(data), length_(length), memory_owned_{false} {}
|
|
|
|
|
// Own memory.
|
|
|
|
|
// PaddleBuf allocate memory internally, and manage it.
|
|
|
|
|
explicit PaddleBuf(size_t length)
|
|
|
|
|
: data_(new char[length]), length_(length), memory_owned_(true) {}
|
|
|
|
|
// Resize to `length` bytes.
|
|
|
|
|
// Set external memory, the PaddleBuf won't manage it.
|
|
|
|
|
PaddleBuf(void* data, size_t length)
|
|
|
|
|
: data_(data), length_(length), memory_owned_{false} {}
|
|
|
|
|
// Copy only available when memory is managed externally.
|
|
|
|
|
explicit PaddleBuf(const PaddleBuf&);
|
|
|
|
|
|
|
|
|
|
// Resize the memory.
|
|
|
|
|
void Resize(size_t length);
|
|
|
|
|
// Reset to external memory.
|
|
|
|
|
// Reset to external memory, with address and length set.
|
|
|
|
|
void Reset(void* data, size_t length);
|
|
|
|
|
// Tell whether the buffer is empty.
|
|
|
|
|
bool empty() const { return length_ == 0; }
|
|
|
|
|
// Get the memory address.
|
|
|
|
|
void* data() const { return data_; }
|
|
|
|
|
// Get the memory length.
|
|
|
|
|
size_t length() const { return length_; }
|
|
|
|
|
|
|
|
|
|
~PaddleBuf() { Free(); }
|
|
|
|
|
PaddleBuf& operator=(const PaddleBuf&);
|
|
|
|
|
PaddleBuf& operator=(PaddleBuf&&);
|
|
|
|
|
PaddleBuf() = default;
|
|
|
|
|
PaddleBuf(PaddleBuf&& other);
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
void Free();
|
|
|
|
@ -64,6 +91,7 @@ class PaddleBuf {
|
|
|
|
|
bool memory_owned_{true};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Basic input and output data structure for PaddlePredictor.
|
|
|
|
|
struct PaddleTensor {
|
|
|
|
|
PaddleTensor() = default;
|
|
|
|
|
std::string name; // variable name.
|
|
|
|
@ -73,19 +101,8 @@ struct PaddleTensor {
|
|
|
|
|
std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class PaddleEngineKind {
|
|
|
|
|
kNative = 0, // Use the native Fluid facility.
|
|
|
|
|
kAnakin, // Use Anakin for inference.
|
|
|
|
|
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
|
|
|
|
|
kAnalysis
|
|
|
|
|
// TODO(Superjomn) support following engines latter.
|
|
|
|
|
// kTensorRT, // Use TensorRT for inference.
|
|
|
|
|
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A simple Inference API for Paddle. Currently this API can be used by
|
|
|
|
|
* non-sequence scenerios.
|
|
|
|
|
* A simple Inference API for Paddle.
|
|
|
|
|
*/
|
|
|
|
|
class PaddlePredictor {
|
|
|
|
|
public:
|
|
|
|
@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config {
|
|
|
|
|
// GPU related fields.
|
|
|
|
|
bool use_gpu{false};
|
|
|
|
|
int device{0};
|
|
|
|
|
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.
|
|
|
|
|
// NOTE: NOT use it, just for the internal test, will discard later
|
|
|
|
|
bool _use_mkldnn{false};
|
|
|
|
|
// Specify the variable's name of each input.
|
|
|
|
|
bool specify_input_name{false};
|
|
|
|
|
float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed.
|
|
|
|
|
|
|
|
|
|
// Specify the exact path of program and parameter files.
|
|
|
|
|
std::string prog_file;
|
|
|
|
|
std::string param_file;
|
|
|
|
|
|
|
|
|
|
// Specify the variable's name of each input if input tensors don't follow the
|
|
|
|
|
// `feeds` and `fetches` of the phase `save_inference_model`.
|
|
|
|
|
bool specify_input_name{false};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Configurations for Anakin engine.
|
|
|
|
|
struct AnakinConfig : public PaddlePredictor::Config {
|
|
|
|
|
enum TargetType { NVGPU = 0, X86 };
|
|
|
|
|
int device;
|
|
|
|
|
std::string model_file;
|
|
|
|
|
int max_batch_size{-1};
|
|
|
|
|
TargetType target_type;
|
|
|
|
|
// A factory to help create different predictors.
|
|
|
|
|
//
|
|
|
|
|
// Usage:
|
|
|
|
|
//
|
|
|
|
|
// NativeConfig config;
|
|
|
|
|
// ... // change the configs.
|
|
|
|
|
// auto native_predictor = CreatePaddlePredictor(config);
|
|
|
|
|
//
|
|
|
|
|
// FOR EXTENSION DEVELOPER:
|
|
|
|
|
// Different predictors are designated by config type. Similar configs can be
|
|
|
|
|
// merged, but there shouldn't be a huge config containing different fields for
|
|
|
|
|
// more than one kind of predictors.
|
|
|
|
|
template <typename ConfigT>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
|
|
|
|
|
|
|
|
|
|
// NOTE The following APIs are too trivial, we will discard it in the following
|
|
|
|
|
// versions.
|
|
|
|
|
enum class PaddleEngineKind {
|
|
|
|
|
kNative = 0, // Use the native Fluid facility.
|
|
|
|
|
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
|
|
|
|
|
kAnalysis, // More optimization.
|
|
|
|
|
kAnakin // Use Anakin for inference, not mature yet.
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct TensorRTConfig : public NativeConfig {
|
|
|
|
|
template <typename ConfigT, PaddleEngineKind engine>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
|
|
|
|
|
|
|
|
|
|
// ==
|
|
|
|
|
//
|
|
|
|
|
// -----------------------------------------------------------------------------------
|
|
|
|
|
// NOTE: The following APIs are not mature yet, we are still working on them.
|
|
|
|
|
|
|
|
|
|
namespace contrib {
|
|
|
|
|
|
|
|
|
|
// Accelerate GPU computation with TensorRT engine.
|
|
|
|
|
struct MixedRTConfig : public NativeConfig {
|
|
|
|
|
// Determine whether a subgraph will be executed by TRT.
|
|
|
|
|
int min_subgraph_size{1};
|
|
|
|
|
// While TensorRT allows an engine optimized for a given max batch size
|
|
|
|
@ -154,7 +198,6 @@ struct TensorRTConfig : public NativeConfig {
|
|
|
|
|
|
|
|
|
|
// NOTE WIP, not stable yet.
|
|
|
|
|
struct AnalysisConfig : public NativeConfig {
|
|
|
|
|
//
|
|
|
|
|
enum class IrPassMode {
|
|
|
|
|
kSystem, // Use system default passes, not customize.
|
|
|
|
|
kInclude, // Specify the passes in `ir_passes`.
|
|
|
|
@ -165,18 +208,21 @@ struct AnalysisConfig : public NativeConfig {
|
|
|
|
|
IrPassMode ir_mode{IrPassMode::kExclude};
|
|
|
|
|
// attention lstm fuse works only on some specific models, disable as default.
|
|
|
|
|
std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"};
|
|
|
|
|
|
|
|
|
|
// NOTE this is just for internal development, please not use it.
|
|
|
|
|
bool _use_mkldnn{false};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// A factory to help create different predictors.
|
|
|
|
|
//
|
|
|
|
|
// FOR EXTENSION DEVELOPER:
|
|
|
|
|
// Different predictors are designated by config type and engine kind. Similar
|
|
|
|
|
// configs can be merged, but there shouldn't be a huge config containing
|
|
|
|
|
// different fields for more than one kind of predictors.
|
|
|
|
|
//
|
|
|
|
|
// Similarly, each engine kind should map to a unique predictor implementation.
|
|
|
|
|
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
|
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
|
|
|
|
|
// Configurations for Anakin engine.
|
|
|
|
|
struct AnakinConfig : public PaddlePredictor::Config {
|
|
|
|
|
enum TargetType { NVGPU = 0, X86 };
|
|
|
|
|
int device;
|
|
|
|
|
std::string model_file;
|
|
|
|
|
int max_batch_size{-1};
|
|
|
|
|
TargetType target_type;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // namespace contrib
|
|
|
|
|
|
|
|
|
|
int PaddleDtypeSize(PaddleDType dtype);
|
|
|
|
|
|
|
|
|
|