Golang inference API (#22503)

* support golang inference
5 years ago · 1d503e6a9e
parent ec90742708
commit 1d503e6a9e
20 changed files with 1102 additions and 27 deletions
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@ -176,7 +176,7 @@ else(WIN32)
 endif(WIN32)

 copy(inference_lib_dist
-        SRCS  ${src_dir}/inference/capi/c_api.h  ${paddle_fluid_c_lib}
+        SRCS  ${src_dir}/inference/capi/paddle_c_api.h  ${paddle_fluid_c_lib}
        DSTS  ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/lib)

 # fluid library for both train and inference
--- a/go/README_cn.md
+++ b/go/README_cn.md
@ -0,0 +1,54 @@
+# Paddle 预测golang API
+
+## 安装
+首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``fluid_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c`
+
+## 在Go中使用Paddle预测
+首先创建预测配置
+``` go
+config := paddle.NewAnalysisConfig()
+config.SetModel(model_file, params_file)
+config.SwitchUseFeedFetchOps(false)
+config.SwitchSpecifyInputNames(true)
+```
+
+创建predictor
+``` go
+predictor := paddle.NewPredictor(config)
+```
+
+获取输入Tensor和输出Tensor
+``` go
+inputs = predictor.GetInputTensors()
+```
+
+设置输入数据(假设只有一个输入)
+``` go
+input := inputs[0]
+input.SetValue(data)
+input.Reshape([]int32{1, 3, 300, 300})
+```
+
+运行预测
+``` go
+predictor.ZeroCopyRun()
+```
+
+获取输入Tensor的真实值
+``` go
+output := outputs[0]
+predictor.GetZeroCopyOutput(output)
+value := reflect.ValueOf(output.Value())
+shape, dtype := paddle.ShapeAndTypeOf(value)
+output_data := value.Interface().([][]float32)
+```
+
+## 示例
+源码见[mobilenet](./demo/mobilenet.go)
+
+下载[数据](https://paddle-inference-dist.cdn.bcebos.com/mobilenet-test-model-data.tar.gz)并解压到当前目录
+
+运行
+``` go
+go run ./demo/mobilenet.go
+```
--- a/go/demo/mobilenet.go
+++ b/go/demo/mobilenet.go
@ -0,0 +1,81 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package main
+
+import "../paddle"
+import "strings"
+import "io/ioutil"
+import "strconv"
+import "reflect"
+
+func main() {
+	config := paddle.NewAnalysisConfig()
+	config.SetModel("data/model/__model__", "data/model/__params__")
+    config.DisableGlogInfo()
+    config.SwitchUseFeedFetchOps(false)
+    config.SwitchSpecifyInputNames(true)
+
+    predictor := paddle.NewPredictor(config)
+
+    println("============== paddle inference ==============")
+    println("input num: ", predictor.GetInputNum())
+    println("input name: ", predictor.GetInputNames()[0])
+    println("output num: ", predictor.GetOutputNum())
+    println("output name: ", predictor.GetInputNames()[0])
+    println("============== run inference =================")
+
+    input := predictor.GetInputTensors()[0]
+    output := predictor.GetOutputTensors()[0]
+
+    filename := "data/data.txt"
+    data := ReadData(filename)
+    input.SetValue(data[:1 * 3 * 300 * 300])
+    input.Reshape([]int32{1, 3, 300, 300})
+
+    predictor.SetZeroCopyInput(input)
+    predictor.ZeroCopyRun()
+    predictor.GetZeroCopyOutput(output)
+
+    println("============= parse output ===================")
+    output_val := output.Value()
+    value := reflect.ValueOf(output_val)
+    shape, dtype := paddle.ShapeAndTypeOf(value)
+    switch dtype {
+    case paddle.PaddleDType(paddle.FLOAT32):
+        v := value.Interface().([][]float32)
+        println("v: ", v[0][0], v[0][1], "...")
+    case paddle.PaddleDType(paddle.UINT8):
+        v := value.Interface().([][]uint8)
+        println("v: ", v[0][0], v[0][1], "...")
+    case paddle.PaddleDType(paddle.INT32):
+        v := value.Interface().([][]int32)
+        println("v: ", v[0][0], v[0][1], "...")
+    case paddle.PaddleDType(paddle.INT64):
+        v := value.Interface().([][]int64)
+        println("v: ", v[0][0], v[0][1], "...")
+    }
+    println(shape[0], shape[1])
+    println(output.Shape()[0])
+}
+
+func ReadData(filename string) []float32 {
+    file_bytes, _ := ioutil.ReadFile(filename)
+    data_slice := strings.Split(string(file_bytes), " ")
+    var result []float32
+    for _, n := range data_slice {
+        r, _ := strconv.ParseFloat(n, 32)
+        result = append(result, float32(r))
+    }
+    return result
+}
--- a/go/demo/mobilenet_c.cc
+++ b/go/demo/mobilenet_c.cc
@ -0,0 +1,72 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <paddle_c_api.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void SetConfig(PD_AnalysisConfig *);
+void ReadData(float *data, int size);
+
+int main(int argc, char *argv[]) {
+  PD_AnalysisConfig *config = PD_NewAnalysisConfig();
+  SetConfig(config);
+  PD_Predictor *predictor = PD_NewPredictor(config);
+
+  int input_num = PD_GetInputNum(predictor);
+  printf("Input num: %d\n", input_num);
+  int output_num = PD_GetOutputNum(predictor);
+  printf("Output num: %d\n", output_num);
+
+  PD_ZeroCopyTensor input;
+  input.name = const_cast<char *>(PD_GetInputName(predictor, 0));  // NOLINT
+  input.data.capacity = sizeof(float) * 1 * 3 * 300 * 300;
+  input.data.length = input.data.capacity;
+  input.data.data = malloc(input.data.capacity);
+  int shape[] = {1, 3, 300, 300};
+  input.shape.data = static_cast<int *>(shape);
+  input.shape.capacity = sizeof(shape);
+  input.shape.length = sizeof(shape);
+  input.dtype = PD_FLOAT32;
+  ReadData((float *)input.data.data, 1 * 3 * 300 * 300);  // NOLINT
+  float *data = (float *)input.data.data;                 // NOLINT
+  PD_SetZeroCopyInput(predictor, &input);
+  int *shape_ptr = (int *)input.shape.data;  // NOLINT
+
+  PD_ZeroCopyRun(predictor);
+  PD_ZeroCopyTensor output;
+  PD_InitZeroCopyTensor(&output);
+  output.name = const_cast<char *>(PD_GetOutputName(predictor, 0));  // NOLINT
+  PD_GetZeroCopyOutput(predictor, &output);
+  PD_DestroyZeroCopyTensor(&output);
+
+  PD_DeleteAnalysisConfig(config);
+  PD_DeletePredictor(predictor);
+  return 0;
+}
+
+void SetConfig(PD_AnalysisConfig *config) {
+  PD_SetModel(config, "data/model/__model__", "data/model/__params__");
+  PD_SwitchUseFeedFetchOps(config, false);
+  PD_SwitchSpecifyInputNames(config, true);
+  PD_DisableGlogInfo(config);
+  // PD_SwitchIrOptim(config, false);
+}
+
+void ReadData(float *data, int n) {
+  FILE *fp = fopen("data/data.txt", "r");
+  for (int i = 0; i < n; i++) {
+    fscanf(fp, "%f", &data[i]);
+  }
+  fclose(fp);
+}
--- a/go/demo/mobilenet_cxx.cc
+++ b/go/demo/mobilenet_cxx.cc
@ -0,0 +1,47 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <paddle_inference_api.h>
+#include <fstream>
+#include <iostream>
+
+void SetConfig(paddle::AnalysisConfig *);
+
+int main(int argc, char *argv[]) {
+  paddle::AnalysisConfig config;
+  SetConfig(&config);
+  auto predictor = paddle::CreatePaddlePredictor(config);
+  auto input_name = predictor->GetInputNames()[0];
+  auto input = predictor->GetInputTensor(input_name);
+  std::cout << predictor->GetOutputNames()[0] << std::endl;
+  std::vector<int> shape{1, 3, 300, 300};
+  input->Reshape(std::move(shape));
+  std::vector<float> data(1 * 300 * 300 * 3);
+  std::ifstream fin("data/data.txt");
+  for (int i = 0; i < data.size(); i++) {
+    fin >> data[i];
+  }
+
+  input->copy_from_cpu(data.data());
+  predictor->ZeroCopyRun();
+  auto output_name = predictor->GetOutputNames()[0];
+  output = predictor->GetOutputTensor(output_name);
+  return 0;
+}
+
+void SetConfig(paddle::AnalysisConfig *config) {
+  config->SetModel("data/model/__model__", "data/model/__params__");
+  config->SwitchUseFeedFetchOps(true);
+  config->SwitchSpecifyInputNames(true);
+  config->SwitchIrOptim(false);
+}
--- a/go/paddle/common.go
+++ b/go/paddle/common.go
@ -0,0 +1,42 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #cgo CFLAGS: -Ipaddle_c/paddle/include
+// #cgo LDFLAGS: -Lpaddle_c/paddle/lib -lpaddle_fluid_c
+// #include <stdbool.h>
+// #include <paddle_c_api.h>
+import "C"
+import "fmt"
+
+func ConvertCBooleanToGo(b C.bool) bool {
+	var c_false C.bool
+	if b != c_false {
+		return true
+	}
+	return false
+}
+
+func numel(shape []int32) int32 {
+	n := int32(1)
+	for _, d := range shape {
+		n *= d
+	}
+	return n
+}
+
+func bug(format string, args ...interface{}) error {
+	return fmt.Errorf("Bug %v", fmt.Sprintf(format, args...))
+}
--- a/go/paddle/config.go
+++ b/go/paddle/config.go
@ -0,0 +1,189 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #cgo CFLAGS: -Ipaddle_c/paddle/include
+// #cgo LDFLAGS: -Lpaddle_c/paddle/lib -lpaddle_fluid_c
+// #include <stdbool.h>
+// #include <stdlib.h>
+// #include <paddle_c_api.h>
+import "C"
+
+import "runtime"
+import "unsafe"
+
+type AnalysisConfig struct {
+	c *C.PD_AnalysisConfig
+}
+
+func NewAnalysisConfig() *AnalysisConfig {
+	c_config := C.PD_NewAnalysisConfig()
+	config := &AnalysisConfig{c: c_config}
+	runtime.SetFinalizer(config, (*AnalysisConfig).finalize)
+	return config
+}
+
+func (config *AnalysisConfig) finalize() {
+	C.PD_DeleteAnalysisConfig(config.c)
+}
+
+func (config *AnalysisConfig) SetModel(model, params string) {
+	//C.printString((*C.char)(unsafe.Pointer(&s[0])))
+	c_model := C.CString(model)
+	defer C.free(unsafe.Pointer(c_model))
+	c_params := C.CString(params)
+	defer C.free(unsafe.Pointer(c_params))
+
+	C.PD_SetModel(config.c, c_model, c_params)
+}
+
+func (config *AnalysisConfig) ModelDir() string {
+	return C.GoString(C.PD_ModelDir(config.c))
+}
+
+func (config *AnalysisConfig) ProgFile() string {
+	return C.GoString(C.PD_ProgFile(config.c))
+}
+
+func (config *AnalysisConfig) ParamsFile() string {
+	return C.GoString(C.PD_ParamsFile(config.c))
+}
+
+func (config *AnalysisConfig) EnableUseGpu(memory_pool_init_size_mb uint64, device_id int) {
+	C.PD_EnableUseGpu(config.c, C.ulong(memory_pool_init_size_mb), C.int(device_id))
+}
+
+func (config *AnalysisConfig) DisableGpu() {
+	C.PD_DisableGpu(config.c)
+}
+
+func (config *AnalysisConfig) UseGpu() bool {
+	return ConvertCBooleanToGo(C.PD_UseGpu(config.c))
+}
+
+func (config *AnalysisConfig) GpuDeviceId() int {
+	return int(C.PD_GpuDeviceId(config.c))
+}
+
+func (config *AnalysisConfig) MemoryPoolInitSizeMb() int {
+	return int(C.PD_MemoryPoolInitSizeMb(config.c))
+}
+
+func (config *AnalysisConfig) EnableCudnn() {
+	C.PD_EnableCUDNN(config.c)
+}
+
+func (config *AnalysisConfig) CudnnEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_CudnnEnabled(config.c))
+}
+
+func (config *AnalysisConfig) SwitchIrOptim(x bool) {
+	C.PD_SwitchIrOptim(config.c, C.bool(x))
+}
+
+func (config *AnalysisConfig) IrOptim() bool {
+	return ConvertCBooleanToGo(C.PD_IrOptim(config.c))
+}
+
+func (config *AnalysisConfig) SwitchUseFeedFetchOps(x bool) {
+	C.PD_SwitchUseFeedFetchOps(config.c, C.bool(x))
+}
+
+func (config *AnalysisConfig) UseFeedFetchOpsEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_UseFeedFetchOpsEnabled(config.c))
+}
+
+func (config *AnalysisConfig) SwitchSpecifyInputNames(x bool) {
+	C.PD_SwitchSpecifyInputNames(config.c, C.bool(x))
+}
+
+func (config *AnalysisConfig) SpecifyInputName() bool {
+	return ConvertCBooleanToGo(C.PD_SpecifyInputName(config.c))
+}
+
+//func (config *AnalysisConfig) EnableTensorRtEngine(workspace_size int)
+
+func (config *AnalysisConfig) TensorrtEngineEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_TensorrtEngineEnabled(config.c))
+}
+
+func (config *AnalysisConfig) SwitchIrDebug(x bool) {
+	C.PD_SwitchIrDebug(config.c, C.bool(x))
+}
+
+func (config *AnalysisConfig) EnableNgraph() {
+	C.PD_EnableNgraph(config.c)
+}
+
+func (config *AnalysisConfig) NgraphEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_NgraphEnabled(config.c))
+}
+
+func (config *AnalysisConfig) EnableMkldnn() {
+	C.PD_EnableMKLDNN(config.c)
+}
+
+func (config *AnalysisConfig) SetCpuMathLibraryNumThreads(n int) {
+	C.PD_SetCpuMathLibraryNumThreads(config.c, C.int(n))
+}
+
+func (config *AnalysisConfig) CpuMathLibraryNumThreads() int {
+	return int(C.PD_CpuMathLibraryNumThreads(config.c))
+}
+
+func (config *AnalysisConfig) EnableMkldnnQuantizer() {
+	C.PD_EnableMkldnnQuantizer(config.c)
+}
+
+func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
+}
+
+// SetModelBuffer
+// ModelFromMemory
+
+func (config *AnalysisConfig) EnableMemoryOptim() {
+	C.PD_EnableMemoryOptim(config.c)
+}
+
+func (config *AnalysisConfig) MemoryOptimEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_MemoryOptimEnabled(config.c))
+}
+
+func (config *AnalysisConfig) EnableProfile() {
+	C.PD_EnableProfile(config.c)
+}
+
+func (config *AnalysisConfig) ProfileEnabled() bool {
+	return ConvertCBooleanToGo(C.PD_ProfileEnabled(config.c))
+}
+
+func (config *AnalysisConfig) DisableGlogInfo() {
+	C.PD_DisableGlogInfo(config.c)
+}
+
+func (config *AnalysisConfig) DeletePass(pass string) {
+    c_pass := C.CString(pass)
+    defer C.free(unsafe.Pointer(c_pass))
+    C.PD_DeletePass(config.c, c_pass)
+}
+
+func (config *AnalysisConfig) SetInValid() {
+    C.PD_SetInValid(config.c)
+}
+
+func (config *AnalysisConfig) IsValid() bool {
+    return ConvertCBooleanToGo(C.PD_IsValid(config.c))
+}
--- a/go/paddle/predictor.go
+++ b/go/paddle/predictor.go
@ -0,0 +1,115 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package paddle
+
+// #cgo CFLAGS: -Ipaddle_c/paddle/include
+// #cgo LDFLAGS: -Lpaddle_c/paddle/lib -lpaddle_fluid_c
+// #include <stdbool.h>
+// #include "paddle_c_api.h"
+import "C"
+
+import "reflect"
+import "runtime"
+import "unsafe"
+
+type Predictor struct {
+	c *C.PD_Predictor
+}
+
+func NewPredictor(config *AnalysisConfig) *Predictor {
+	c_predictor := C.PD_NewPredictor((*config).c)
+	predictor := &Predictor{c: c_predictor}
+	runtime.SetFinalizer(predictor, (*Predictor).finalize)
+	return predictor
+}
+
+func (predictor *Predictor) finalize() {
+	C.PD_DeletePredictor(predictor.c)
+}
+
+func DeletePredictor(predictor *Predictor) {
+	C.PD_DeletePredictor(predictor.c)
+}
+
+func (predictor *Predictor) GetInputNum() int {
+	return int(C.PD_GetInputNum(predictor.c))
+}
+
+func (predictor *Predictor) GetOutputNum() int {
+	return int(C.PD_GetOutputNum(predictor.c))
+}
+
+func (predictor *Predictor) GetInputName(n int) string {
+	return C.GoString(C.PD_GetInputName(predictor.c, C.int(n)))
+}
+
+func (predictor *Predictor) GetOutputName(n int) string {
+	return C.GoString(C.PD_GetOutputName(predictor.c, C.int(n)))
+}
+
+func (predictor *Predictor) GetInputTensors() [](*ZeroCopyTensor) {
+	var result [](*ZeroCopyTensor)
+	for i := 0; i < predictor.GetInputNum(); i++ {
+		tensor := NewZeroCopyTensor()
+		tensor.c.name = C.PD_GetInputName(predictor.c, C.int(i))
+		result = append(result, tensor)
+	}
+	return result
+}
+
+func (predictor *Predictor) GetOutputTensors() [](*ZeroCopyTensor) {
+	var result [](*ZeroCopyTensor)
+	for i := 0; i < predictor.GetOutputNum(); i++ {
+		tensor := NewZeroCopyTensor()
+		tensor.c.name = C.PD_GetOutputName(predictor.c, C.int(i))
+		result = append(result, tensor)
+	}
+	return result
+}
+
+func (predictor *Predictor) GetInputNames() []string {
+	names := make([]string, predictor.GetInputNum())
+	for i := 0; i < len(names); i++ {
+		names[i] = predictor.GetInputName(i)
+	}
+	return names
+}
+
+func (predictor *Predictor) GetOutputNames() []string {
+	names := make([]string, predictor.GetInputNum())
+	for i := 0; i < len(names); i++ {
+		names[i] = predictor.GetOutputName(i)
+	}
+	return names
+}
+
+func (predictor *Predictor) SetZeroCopyInput(tensor *ZeroCopyTensor) {
+	C.PD_SetZeroCopyInput(predictor.c, tensor.c)
+}
+
+func (predictor *Predictor) GetZeroCopyOutput(tensor *ZeroCopyTensor) {
+	C.PD_GetZeroCopyOutput(predictor.c, tensor.c)
+	tensor.name = C.GoString(tensor.c.name)
+    var shape []int32
+	shape_hdr := (*reflect.SliceHeader)(unsafe.Pointer(&shape))
+	shape_hdr.Data = uintptr(unsafe.Pointer(tensor.c.shape.data))
+	shape_hdr.Len = int(tensor.c.shape.length / C.sizeof_int)
+	shape_hdr.Cap = int(tensor.c.shape.length / C.sizeof_int)
+    tensor.Reshape(shape)
+}
+
+func (predictor *Predictor) ZeroCopyRun() {
+	C.PD_ZeroCopyRun(predictor.c)
+}
--- a/go/paddle/tensor.go
+++ b/go/paddle/tensor.go
--- a/paddle/fluid/inference/api/api.cc
+++ b/paddle/fluid/inference/api/api.cc
@ -30,6 +30,8 @@ int PaddleDtypeSize(PaddleDType dtype) {
      return sizeof(int64_t);
    case PaddleDType::INT32:
      return sizeof(int32_t);
+    case PaddleDType::UINT8:
+      return sizeof(uint8_t);
    default:
      assert(false);
      return -1;
--- a/paddle/fluid/inference/capi/c_api.cc
+++ b/paddle/fluid/inference/capi/c_api.cc
@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/inference/capi/c_api.h"
 #include <algorithm>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api_internal.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"

+using paddle::ConvertToACPrecision;
 using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
-using paddle::ConvertToACPrecision;

 extern "C" {

--- a/paddle/fluid/inference/capi/c_api_internal.h
+++ b/paddle/fluid/inference/capi/c_api_internal.h
@ -17,6 +17,7 @@
 #include <memory>
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/inference/api/paddle_api.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"
 #include "paddle/fluid/platform/enforce.h"

 using PD_PaddleDType = paddle::PaddleDType;
@ -34,10 +35,14 @@ struct PD_PaddleBuf {
  paddle::PaddleBuf buf;
 };

+struct PD_Predictor {
+  std::unique_ptr<paddle::PaddlePredictor> predictor;
+};
+
 namespace paddle {
 paddle::PaddleDType ConvertToPaddleDType(PD_DataType dtype);

 PD_DataType ConvertToPDDataType(PD_PaddleDType dtype);

 PD_ACPrecision ConvertToACPrecision(Precision dtype);
-}
+}  // namespace paddle
--- a/paddle/fluid/inference/capi/paddle_c_api.h
+++ b/paddle/fluid/inference/capi/paddle_c_api.h
@ -33,8 +33,32 @@ extern "C" {
 #endif

 enum PD_DataType { PD_FLOAT32, PD_INT32, PD_INT64, PD_UINT8, PD_UNKDTYPE };
+
+typedef enum PD_DataType PD_DataType;
+
 typedef struct PD_PaddleBuf PD_PaddleBuf;
 typedef struct PD_AnalysisConfig PD_AnalysisConfig;
+typedef struct PD_Predictor PD_Predictor;
+
+typedef struct PD_Buffer {
+  void* data;
+  size_t length;
+  size_t capacity;
+} PD_Buffer;
+
+typedef struct PD_ZeroCopyTensor {
+  PD_Buffer data;
+  PD_Buffer shape;
+  PD_Buffer lod;
+  PD_DataType dtype;
+  char* name;
+} PD_ZeroCopyTensor;
+
+PADDLE_CAPI_EXPORT extern PD_ZeroCopyTensor* PD_NewZeroCopyTensor();
+PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);
+PADDLE_CAPI_EXPORT extern void PD_InitZeroCopyTensor(PD_ZeroCopyTensor*);
+PADDLE_CAPI_EXPORT extern void PD_DestroyZeroCopyTensor(PD_ZeroCopyTensor*);
+PADDLE_CAPI_EXPORT extern void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor*);

 typedef struct PD_ZeroCopyData {
  char* name;
@ -108,6 +132,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(

 // AnalysisConfig
 enum Precision { kFloat32 = 0, kInt8, kHalf };
+typedef enum Precision Precision;

 PADDLE_CAPI_EXPORT extern PD_AnalysisConfig* PD_NewAnalysisConfig();

@ -116,7 +141,7 @@ PADDLE_CAPI_EXPORT extern void PD_DeleteAnalysisConfig(

 PADDLE_CAPI_EXPORT extern void PD_SetModel(PD_AnalysisConfig* config,
                                           const char* model_dir,
-                                           const char* params_path = NULL);
+                                           const char* params_path);

 PADDLE_CAPI_EXPORT
 extern void PD_SetProgFile(PD_AnalysisConfig* config, const char* x);
@ -138,7 +163,7 @@ PADDLE_CAPI_EXPORT extern const char* PD_ParamsFile(

 PADDLE_CAPI_EXPORT extern void PD_EnableUseGpu(
    PD_AnalysisConfig* config, uint64_t memory_pool_init_size_mb,
-    int device_id = 0);
+    int device_id);

 PADDLE_CAPI_EXPORT extern void PD_DisableGpu(PD_AnalysisConfig* config);

@ -157,27 +182,26 @@ PADDLE_CAPI_EXPORT extern void PD_EnableCUDNN(PD_AnalysisConfig* config);
 PADDLE_CAPI_EXPORT extern bool PD_CudnnEnabled(const PD_AnalysisConfig* config);

 PADDLE_CAPI_EXPORT extern void PD_SwitchIrOptim(PD_AnalysisConfig* config,
-                                                bool x = true);
+                                                bool x);

 PADDLE_CAPI_EXPORT extern bool PD_IrOptim(const PD_AnalysisConfig* config);

 PADDLE_CAPI_EXPORT extern void PD_SwitchUseFeedFetchOps(
-    PD_AnalysisConfig* config, bool x = true);
+    PD_AnalysisConfig* config, bool x);

 PADDLE_CAPI_EXPORT extern bool PD_UseFeedFetchOpsEnabled(
    const PD_AnalysisConfig* config);

 PADDLE_CAPI_EXPORT extern void PD_SwitchSpecifyInputNames(
-    PD_AnalysisConfig* config, bool x = true);
+    PD_AnalysisConfig* config, bool x);

 PADDLE_CAPI_EXPORT extern bool PD_SpecifyInputName(
    const PD_AnalysisConfig* config);

 PADDLE_CAPI_EXPORT extern void PD_EnableTensorRtEngine(
-    PD_AnalysisConfig* config, int workspace_size = 1 << 20,
-    int max_batch_size = 1, int min_subgraph_size = 3,
-    Precision precision = Precision::kFloat32, bool use_static = false,
-    bool use_calib_mode = false);
+    PD_AnalysisConfig* config, int workspace_size, int max_batch_size,
+    int min_subgraph_size, Precision precision, bool use_static,
+    bool use_calib_mode);

 PADDLE_CAPI_EXPORT extern bool PD_TensorrtEngineEnabled(
    const PD_AnalysisConfig* config);
@ -189,7 +213,7 @@ typedef struct PD_MaxInputShape {
 } PD_MaxInputShape;

 PADDLE_CAPI_EXPORT extern void PD_SwitchIrDebug(PD_AnalysisConfig* config,
-                                                bool x = true);
+                                                bool x);

 PADDLE_CAPI_EXPORT extern void PD_EnableNgraph(PD_AnalysisConfig* config);

@ -238,6 +262,23 @@ PADDLE_CAPI_EXPORT extern bool PD_ProfileEnabled(
 PADDLE_CAPI_EXPORT extern void PD_SetInValid(PD_AnalysisConfig* config);

 PADDLE_CAPI_EXPORT extern bool PD_IsValid(const PD_AnalysisConfig* config);
+PADDLE_CAPI_EXPORT extern void PD_DisableGlogInfo(PD_AnalysisConfig* config);
+PADDLE_CAPI_EXPORT extern void PD_DeletePass(PD_AnalysisConfig* config,
+                                             char* pass_name);
+
+PADDLE_CAPI_EXPORT extern PD_Predictor* PD_NewPredictor(
+    const PD_AnalysisConfig* config);
+PADDLE_CAPI_EXPORT extern void PD_DeletePredictor(PD_Predictor* predictor);
+PADDLE_CAPI_EXPORT extern int PD_GetInputNum(const PD_Predictor*);
+PADDLE_CAPI_EXPORT extern int PD_GetOutputNum(const PD_Predictor*);
+PADDLE_CAPI_EXPORT extern const char* PD_GetInputName(const PD_Predictor*, int);
+PADDLE_CAPI_EXPORT extern const char* PD_GetOutputName(const PD_Predictor*,
+                                                       int);
+PADDLE_CAPI_EXPORT extern void PD_SetZeroCopyInput(
+    PD_Predictor* predictor, const PD_ZeroCopyTensor* tensor);
+PADDLE_CAPI_EXPORT extern void PD_GetZeroCopyOutput(PD_Predictor* predictor,
+                                                    PD_ZeroCopyTensor* tensor);
+PADDLE_CAPI_EXPORT extern void PD_ZeroCopyRun(PD_Predictor* predictor);

 #ifdef __cplusplus
 }  // extern "C"
--- a/paddle/fluid/inference/capi/pd_config.cc
+++ b/paddle/fluid/inference/capi/pd_config.cc
@ -18,12 +18,12 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/capi/c_api_internal.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"

+using paddle::ConvertToACPrecision;
 using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
-using paddle::ConvertToACPrecision;

 extern "C" {

@ -258,4 +258,12 @@ bool PD_IsValid(const PD_AnalysisConfig* config) {
  PADDLE_ENFORCE_NOT_NULL(config);
  return config->config.is_valid();
 }
+
+void PD_DisableGlogInfo(PD_AnalysisConfig* config) {
+  config->config.DisableGlogInfo();
+}
+
+void PD_DeletePass(PD_AnalysisConfig* config, char* pass_name) {
+  return config->config.pass_builder()->DeletePass(std::string(pass_name));
+}
 }  // extern "C"
--- a/paddle/fluid/inference/capi/pd_predictor.cc
+++ b/paddle/fluid/inference/capi/pd_predictor.cc
@ -13,16 +13,19 @@
 // limitations under the License.

 #include <algorithm>
+#include <cstdlib>
+#include <cstring>
 #include <map>
 #include <memory>
 #include <numeric>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
+#include "paddle/fluid/inference/api/paddle_api.h"
 #include "paddle/fluid/inference/capi/c_api_internal.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"

+using paddle::ConvertToACPrecision;
 using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
-using paddle::ConvertToACPrecision;

 namespace {
 #define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \
@ -169,4 +172,130 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
  }
  return true;
 }
+
+PD_Predictor* PD_NewPredictor(const PD_AnalysisConfig* config) {
+  PD_Predictor* predictor = new PD_Predictor;
+  predictor->predictor = paddle::CreatePaddlePredictor(config->config);
+  return predictor;
+}
+
+void PD_DeletePredictor(PD_Predictor* predictor) {
+  if (predictor == nullptr) {
+    delete predictor;
+    predictor = nullptr;
+  }
+}
+
+int PD_GetInputNum(const PD_Predictor* predictor) {
+  return static_cast<int>(predictor->predictor->GetInputNames().size());
+}
+
+int PD_GetOutputNum(const PD_Predictor* predictor) {
+  return static_cast<int>(predictor->predictor->GetOutputNames().size());
+}
+
+const char* PD_GetInputName(const PD_Predictor* predictor, int n) {
+  static std::vector<std::string> names = predictor->predictor->GetInputNames();
+  return names[n].c_str();
+}
+
+const char* PD_GetOutputName(const PD_Predictor* predictor, int n) {
+  static std::vector<std::string> names =
+      predictor->predictor->GetOutputNames();
+  return names[n].c_str();
+}
+
+void PD_SetZeroCopyInput(PD_Predictor* predictor,
+                         const PD_ZeroCopyTensor* tensor) {
+  auto input = predictor->predictor->GetInputTensor(tensor->name);
+  auto* shape_ptr = static_cast<int*>(tensor->shape.data);
+  std::vector<int> shape(shape_ptr,
+                         shape_ptr + tensor->shape.length / sizeof(int));
+  input->Reshape(std::move(shape));
+  switch (tensor->dtype) {
+    case PD_FLOAT32:
+      input->copy_from_cpu(static_cast<float*>(tensor->data.data));
+      break;
+    case PD_INT32:
+      input->copy_from_cpu(static_cast<int32_t*>(tensor->data.data));
+      break;
+    case PD_INT64:
+      input->copy_from_cpu(static_cast<int64_t*>(tensor->data.data));
+      break;
+    case PD_UINT8:
+      input->copy_from_cpu(static_cast<uint8_t*>(tensor->data.data));
+      break;
+    default:
+      CHECK(false) << "Unsupport data type.";
+      break;
+  }
+
+  if (tensor->lod.length) {
+    auto* lod_ptr = reinterpret_cast<size_t*>(tensor->lod.data);
+    std::vector<size_t> lod(lod_ptr, lod_ptr + tensor->lod.length);
+    input->SetLoD({std::move(lod)});
+  }
+}
+
+void PD_GetZeroCopyOutput(PD_Predictor* predictor, PD_ZeroCopyTensor* tensor) {
+  auto output = predictor->predictor->GetOutputTensor(tensor->name);
+  tensor->dtype = ConvertToPDDataType(output->type());
+  auto shape = output->shape();
+  size_t shape_size = shape.size();
+  if (tensor->shape.capacity < shape_size * sizeof(int)) {
+    if (tensor->shape.data || tensor->shape.capacity) {
+      std::free(tensor->shape.data);
+    }
+    tensor->shape.data = std::malloc(shape_size * sizeof(int));
+    tensor->shape.capacity = shape_size * sizeof(int);
+  }
+  tensor->shape.length = shape_size * sizeof(int);
+  std::copy(shape.begin(), shape.end(), static_cast<int*>(tensor->shape.data));
+
+  int n =
+      std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
+  size_t length = n * paddle::PaddleDtypeSize(output->type());
+  if (tensor->data.capacity < length) {
+    if (tensor->data.data) {
+      std::free(tensor->data.data);
+    }
+    tensor->data.data = std::malloc(length);
+    tensor->data.capacity = std::move(length);
+  }
+  tensor->data.length = length;
+
+  auto lod = output->lod();
+  tensor->lod.length = lod.front().size() * sizeof(size_t);
+  if (tensor->lod.capacity < lod.front().size()) {
+    if (tensor->lod.data) {
+      std::free(tensor->lod.data);
+    }
+
+    tensor->lod.data = std::malloc(lod.front().size() * sizeof(size_t));
+    tensor->lod.capacity = lod.front().size() * sizeof(size_t);
+  }
+  std::copy(lod.front().begin(), lod.front().end(),
+            reinterpret_cast<size_t*>(tensor->lod.data));
+  switch (tensor->dtype) {
+    case PD_FLOAT32:
+      output->copy_to_cpu(reinterpret_cast<float*>(tensor->data.data));
+      break;
+    case PD_INT32:
+      output->copy_to_cpu(reinterpret_cast<int32_t*>(tensor->data.data));
+      break;
+    case PD_INT64:
+      output->copy_to_cpu(reinterpret_cast<int64_t*>(tensor->data.data));
+      break;
+    case PD_UINT8:
+      output->copy_to_cpu(reinterpret_cast<uint8_t*>(tensor->data.data));
+      break;
+    default:
+      CHECK(false) << "Unsupport data type.";
+      break;
+  }
+}
+
+void PD_ZeroCopyRun(PD_Predictor* predictor) {
+  predictor->predictor->ZeroCopyRun();
+}
 }  // extern "C"
--- a/paddle/fluid/inference/capi/pd_tensor.cc
+++ b/paddle/fluid/inference/capi/pd_tensor.cc
@ -13,13 +13,16 @@
 // limitations under the License.

 #include <algorithm>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/capi/c_api_internal.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"

+using paddle::ConvertToACPrecision;
 using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
-using paddle::ConvertToACPrecision;

 extern "C" {
 // PaddleTensor
@ -78,4 +81,35 @@ int* PD_GetPaddleTensorShape(const PD_Tensor* tensor, int** size) {
  return shape.data();
 }

+PD_ZeroCopyTensor* PD_NewZeroCopyTensor() {
+  auto* tensor = new PD_ZeroCopyTensor;
+  PD_InitZeroCopyTensor(tensor);
+  return tensor;
+}
+void PD_DeleteZeroCopyTensor(PD_ZeroCopyTensor* tensor) {
+  if (tensor) {
+    PD_DestroyZeroCopyTensor(tensor);
+    delete tensor;
+  }
+  tensor = nullptr;
+}
+
+void PD_InitZeroCopyTensor(PD_ZeroCopyTensor* tensor) {
+  std::memset(tensor, 0, sizeof(PD_ZeroCopyTensor));
+}
+
+void PD_DestroyZeroCopyTensor(PD_ZeroCopyTensor* tensor) {
+#define __PADDLE_INFER_CAPI_DELETE_PTR(__ptr) \
+  if (__ptr) {                                \
+    std::free(__ptr);                         \
+    __ptr = nullptr;                          \
+  }
+
+  __PADDLE_INFER_CAPI_DELETE_PTR(tensor->data.data);
+  __PADDLE_INFER_CAPI_DELETE_PTR(tensor->shape.data);
+  __PADDLE_INFER_CAPI_DELETE_PTR(tensor->lod.data);
+
+#undef __PADDLE_INFER_CAPI_DELETE_PTR
+}
+
 }  // extern "C"
--- a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc
@ -19,7 +19,7 @@ limitations under the License. */
 #include <iostream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"

 namespace paddle {
@ -37,7 +37,7 @@ TEST(PD_AnalysisConfig, use_gpu) {
  PD_SwitchUseFeedFetchOps(config, false);
  PD_SwitchSpecifyInputNames(config, true);
  PD_SwitchIrDebug(config, true);
-  PD_SetModel(config, model_dir.c_str());
+  PD_SetModel(config, model_dir.c_str(), nullptr);
  PD_SetOptimCacheDir(config, (FLAGS_infer_model + "/OptimCacheDir").c_str());
  const char *model_dir_ = PD_ModelDir(config);
  LOG(INFO) << model_dir_;
@ -56,7 +56,8 @@ TEST(PD_AnalysisConfig, use_gpu) {
  PD_SwitchIrOptim(config, true);
  bool ir_optim = PD_IrOptim(config);
  CHECK(ir_optim) << "NO";
-  PD_EnableTensorRtEngine(config);
+  PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false,
+                          false);
  bool trt_enable = PD_TensorrtEngineEnabled(config);
  CHECK(trt_enable) << "NO";
  PD_EnableNgraph(config);
--- a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
@ -19,7 +19,7 @@ limitations under the License. */
 #include <iostream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"

 namespace paddle {
@ -34,7 +34,7 @@ void zero_copy_run() {
  PD_SwitchUseFeedFetchOps(config, false);
  PD_SwitchSpecifyInputNames(config, true);
  PD_SwitchIrDebug(config, true);
-  PD_SetModel(config, model_dir.c_str());  //, params_file1.c_str());
+  PD_SetModel(config, model_dir.c_str(), nullptr);
  bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
  CHECK(!use_feed_fetch) << "NO";
  bool specify_input_names = PD_SpecifyInputName(config);
--- a/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc
@ -20,8 +20,8 @@ limitations under the License. */
 #include <sstream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/capi/c_api_internal.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"

 namespace paddle {
--- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
@ -19,7 +19,7 @@ limitations under the License. */
 #include <iostream>
 #include <string>
 #include <vector>
-#include "paddle/fluid/inference/capi/c_api.h"
+#include "paddle/fluid/inference/capi/paddle_c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"

 namespace paddle {