init Inference top APIs (#10549)
	
		
	
				
					
				
			
							parent
							
								
									13457ef306
								
							
						
					
					
						commit
						6d371e452e
					
				@ -0,0 +1,27 @@
 | 
				
			|||||||
 | 
					# Embed Paddle Inference in Your Application
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Paddle inference offers the APIs in `C` and `C++` languages.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					One can easily deploy a model trained by Paddle following the steps as below:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					1. Optimize the native model;
 | 
				
			||||||
 | 
					2. Write some codes for deployment.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Let's explain the steps in detail.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Optimize the native Fluid Model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The native model that get from the training phase needs to be optimized for that.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					- Clean the noise such as the cost operators that do not need inference;
 | 
				
			||||||
 | 
					- Prune unnecessary computation fork that has nothing to do with the output;
 | 
				
			||||||
 | 
					- Remove extraneous variables;
 | 
				
			||||||
 | 
					- Memory reuse for native Fluid executor;
 | 
				
			||||||
 | 
					- Translate the model storage format to some third-party engine's, so that the inference API can utilize the engine for acceleration;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					We have an official tool to do the optimization, call `paddle_inference_optimize --help` for more information.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Write some codes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Read `paddle_inference_api.h` for more information.
 | 
				
			||||||
@ -0,0 +1,69 @@
 | 
				
			|||||||
 | 
					/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					   you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					   You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					   distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					   See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					   limitations under the License. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include <string>
 | 
				
			||||||
 | 
					#include <vector>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace paddle {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Predictor {
 | 
				
			||||||
 | 
					public:
 | 
				
			||||||
 | 
					  struct Attr;
 | 
				
			||||||
 | 
					  Predictor() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Build the network before inference.
 | 
				
			||||||
 | 
					  bool Init(const Attr& attr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Predict an record.
 | 
				
			||||||
 | 
					  // Arguments:
 | 
				
			||||||
 | 
					  //   inputs: the name of the input variables.
 | 
				
			||||||
 | 
					  //   outputs: the name of the output varaibles.
 | 
				
			||||||
 | 
					  //   input_shapes: the shape of the input variables.
 | 
				
			||||||
 | 
					  //   output_shapes: the shape of the output variables.
 | 
				
			||||||
 | 
					  //   input_data: the data of the input variables.
 | 
				
			||||||
 | 
					  //   output_data: the data of the output variables.
 | 
				
			||||||
 | 
					  bool Run(const std::vector<std::string>& inputs,
 | 
				
			||||||
 | 
					           const std::vector<std::string>& outputs,
 | 
				
			||||||
 | 
					           const std::vector<std::vector<int>>& input_shapes,
 | 
				
			||||||
 | 
					           const std::vector<std::vector<int>>& output_shapes,
 | 
				
			||||||
 | 
					           const std::vector<std::vector<float>>& input_data,
 | 
				
			||||||
 | 
					           std::vector<std::vector<float>>* output_data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Clone a predictor that share the model weights.
 | 
				
			||||||
 | 
					  Predictor* Clone();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Destroy the Predictor.
 | 
				
			||||||
 | 
					  ~Predictor();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  struct Attr {
 | 
				
			||||||
 | 
					    enum class EngineKind;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::string model_dir;      // path to the model directory.
 | 
				
			||||||
 | 
					    bool enable_engine{false};  // Enable to execute (part of) the model on
 | 
				
			||||||
 | 
					                                // third-party engines.
 | 
				
			||||||
 | 
					    EngineKind engine_kind{Attr::EngineKind::kNone};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    enum class EngineKind {
 | 
				
			||||||
 | 
					      kNone = -1,          // Use the native Fluid facility.
 | 
				
			||||||
 | 
					      kAnakin,             // Use Anakin for inference.
 | 
				
			||||||
 | 
					      kTensorRT,           // Use TensorRT for inference.
 | 
				
			||||||
 | 
					      kAutoMixedAnakin,    // Automatically mix Fluid with Anakin.
 | 
				
			||||||
 | 
					      kAutoMixedTensorRT,  // Automatically mix Fluid with TensorRT.
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace paddle
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue