|
|
@ -25,7 +25,6 @@ limitations under the License. */
|
|
|
|
#include <memory>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <vector>
|
|
|
|
#include "paddle/fluid/platform/macros.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
|
|
|
|
|
|
|
|
@ -34,7 +33,7 @@ enum PaddleDType {
|
|
|
|
INT64,
|
|
|
|
INT64,
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class PADDLE_DLL PaddleBuf {
|
|
|
|
class PaddleBuf {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
PaddleBuf() = default;
|
|
|
|
PaddleBuf() = default;
|
|
|
|
PaddleBuf(PaddleBuf&& other);
|
|
|
|
PaddleBuf(PaddleBuf&& other);
|
|
|
@ -46,7 +45,7 @@ class PADDLE_DLL PaddleBuf {
|
|
|
|
PaddleBuf(void* data, size_t length)
|
|
|
|
PaddleBuf(void* data, size_t length)
|
|
|
|
: data_(data), length_(length), memory_owned_{false} {}
|
|
|
|
: data_(data), length_(length), memory_owned_{false} {}
|
|
|
|
// Own memory.
|
|
|
|
// Own memory.
|
|
|
|
explicit PaddleBuf(size_t length)
|
|
|
|
PaddleBuf(size_t length)
|
|
|
|
: data_(new char[length]), length_(length), memory_owned_(true) {}
|
|
|
|
: data_(new char[length]), length_(length), memory_owned_(true) {}
|
|
|
|
// Resize to `length` bytes.
|
|
|
|
// Resize to `length` bytes.
|
|
|
|
void Resize(size_t length);
|
|
|
|
void Resize(size_t length);
|
|
|
@ -65,7 +64,7 @@ class PADDLE_DLL PaddleBuf {
|
|
|
|
bool memory_owned_{true};
|
|
|
|
bool memory_owned_{true};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct PADDLE_DLL PaddleTensor {
|
|
|
|
struct PaddleTensor {
|
|
|
|
PaddleTensor() = default;
|
|
|
|
PaddleTensor() = default;
|
|
|
|
std::string name; // variable name.
|
|
|
|
std::string name; // variable name.
|
|
|
|
std::vector<int> shape;
|
|
|
|
std::vector<int> shape;
|
|
|
@ -88,7 +87,7 @@ enum class PaddleEngineKind {
|
|
|
|
* A simple Inference API for Paddle. Currently this API can be used by
|
|
|
|
* A simple Inference API for Paddle. Currently this API can be used by
|
|
|
|
* non-sequence scenerios.
|
|
|
|
* non-sequence scenerios.
|
|
|
|
*/
|
|
|
|
*/
|
|
|
|
class PADDLE_DLL PaddlePredictor {
|
|
|
|
class PaddlePredictor {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
struct Config;
|
|
|
|
struct Config;
|
|
|
|
PaddlePredictor() = default;
|
|
|
|
PaddlePredictor() = default;
|
|
|
@ -97,6 +96,7 @@ class PADDLE_DLL PaddlePredictor {
|
|
|
|
|
|
|
|
|
|
|
|
// Predict an record.
|
|
|
|
// Predict an record.
|
|
|
|
// The caller should be responsible for allocating and releasing the memory of
|
|
|
|
// The caller should be responsible for allocating and releasing the memory of
|
|
|
|
|
|
|
|
// `inputs`. `inputs` should be available until Run returns. Caller should be
|
|
|
|
// responsible for the output tensor's buffer, either allocated or passed from
|
|
|
|
// responsible for the output tensor's buffer, either allocated or passed from
|
|
|
|
// outside.
|
|
|
|
// outside.
|
|
|
|
virtual bool Run(const std::vector<PaddleTensor>& inputs,
|
|
|
|
virtual bool Run(const std::vector<PaddleTensor>& inputs,
|
|
|
@ -111,12 +111,12 @@ class PADDLE_DLL PaddlePredictor {
|
|
|
|
virtual ~PaddlePredictor() = default;
|
|
|
|
virtual ~PaddlePredictor() = default;
|
|
|
|
|
|
|
|
|
|
|
|
// The common configs for all the predictors.
|
|
|
|
// The common configs for all the predictors.
|
|
|
|
struct PADDLE_DLL Config {
|
|
|
|
struct Config {
|
|
|
|
std::string model_dir; // path to the model directory.
|
|
|
|
std::string model_dir; // path to the model directory.
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config {
|
|
|
|
struct NativeConfig : public PaddlePredictor::Config {
|
|
|
|
// GPU related fields.
|
|
|
|
// GPU related fields.
|
|
|
|
bool use_gpu{false};
|
|
|
|
bool use_gpu{false};
|
|
|
|
int device{0};
|
|
|
|
int device{0};
|
|
|
@ -129,7 +129,7 @@ struct PADDLE_DLL NativeConfig : public PaddlePredictor::Config {
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Configurations for Anakin engine.
|
|
|
|
// Configurations for Anakin engine.
|
|
|
|
struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config {
|
|
|
|
struct AnakinConfig : public PaddlePredictor::Config {
|
|
|
|
enum TargetType { NVGPU = 0, X86 };
|
|
|
|
enum TargetType { NVGPU = 0, X86 };
|
|
|
|
int device;
|
|
|
|
int device;
|
|
|
|
std::string model_file;
|
|
|
|
std::string model_file;
|
|
|
@ -137,7 +137,7 @@ struct PADDLE_DLL AnakinConfig : public PaddlePredictor::Config {
|
|
|
|
TargetType target_type;
|
|
|
|
TargetType target_type;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct PADDLE_DLL TensorRTConfig : public NativeConfig {
|
|
|
|
struct TensorRTConfig : public NativeConfig {
|
|
|
|
// Determine whether a subgraph will be executed by TRT.
|
|
|
|
// Determine whether a subgraph will be executed by TRT.
|
|
|
|
int min_subgraph_size{1};
|
|
|
|
int min_subgraph_size{1};
|
|
|
|
// While TensorRT allows an engine optimized for a given max batch size
|
|
|
|
// While TensorRT allows an engine optimized for a given max batch size
|
|
|
@ -159,9 +159,8 @@ struct PADDLE_DLL TensorRTConfig : public NativeConfig {
|
|
|
|
//
|
|
|
|
//
|
|
|
|
// Similarly, each engine kind should map to a unique predictor implementation.
|
|
|
|
// Similarly, each engine kind should map to a unique predictor implementation.
|
|
|
|
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
|
|
|
|
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
|
|
|
|
PADDLE_DLL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
|
|
|
|
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
|
|
|
|
const ConfigT& config);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PADDLE_DLL int PaddleDtypeSize(PaddleDType dtype);
|
|
|
|
int PaddleDtypeSize(PaddleDType dtype);
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace paddle
|
|
|
|
} // namespace paddle
|
|
|
|