init Inference top APIs (#10549)
parent
13457ef306
commit
6d371e452e
@ -0,0 +1,27 @@
|
||||
# Embed Paddle Inference in Your Application
|
||||
|
||||
Paddle inference offers the APIs in `C` and `C++` languages.
|
||||
|
||||
One can easily deploy a model trained by Paddle following the steps as below:
|
||||
|
||||
1. Optimize the native model;
|
||||
2. Write some codes for deployment.
|
||||
|
||||
|
||||
Let's explain the steps in detail.
|
||||
|
||||
## Optimize the native Fluid Model
|
||||
|
||||
The native model that get from the training phase needs to be optimized for that.
|
||||
|
||||
- Clean the noise such as the cost operators that do not need inference;
|
||||
- Prune unnecessary computation fork that has nothing to do with the output;
|
||||
- Remove extraneous variables;
|
||||
- Memory reuse for native Fluid executor;
|
||||
- Translate the model storage format to some third-party engine's, so that the inference API can utilize the engine for acceleration;
|
||||
|
||||
We have an official tool to do the optimization, call `paddle_inference_optimize --help` for more information.
|
||||
|
||||
## Write some codes
|
||||
|
||||
Read `paddle_inference_api.h` for more information.
|
@ -0,0 +1,69 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace paddle {
|
||||
|
||||
class Predictor {
|
||||
public:
|
||||
struct Attr;
|
||||
Predictor() = default;
|
||||
|
||||
// Build the network before inference.
|
||||
bool Init(const Attr& attr);
|
||||
|
||||
// Predict an record.
|
||||
// Arguments:
|
||||
// inputs: the name of the input variables.
|
||||
// outputs: the name of the output varaibles.
|
||||
// input_shapes: the shape of the input variables.
|
||||
// output_shapes: the shape of the output variables.
|
||||
// input_data: the data of the input variables.
|
||||
// output_data: the data of the output variables.
|
||||
bool Run(const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& outputs,
|
||||
const std::vector<std::vector<int>>& input_shapes,
|
||||
const std::vector<std::vector<int>>& output_shapes,
|
||||
const std::vector<std::vector<float>>& input_data,
|
||||
std::vector<std::vector<float>>* output_data);
|
||||
|
||||
// Clone a predictor that share the model weights.
|
||||
Predictor* Clone();
|
||||
|
||||
// Destroy the Predictor.
|
||||
~Predictor();
|
||||
|
||||
struct Attr {
|
||||
enum class EngineKind;
|
||||
|
||||
std::string model_dir; // path to the model directory.
|
||||
bool enable_engine{false}; // Enable to execute (part of) the model on
|
||||
// third-party engines.
|
||||
EngineKind engine_kind{Attr::EngineKind::kNone};
|
||||
|
||||
enum class EngineKind {
|
||||
kNone = -1, // Use the native Fluid facility.
|
||||
kAnakin, // Use Anakin for inference.
|
||||
kTensorRT, // Use TensorRT for inference.
|
||||
kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
|
||||
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace paddle
|
Loading…
Reference in new issue