Add a c-api interface to initialize the thread environment of Paddle (#5773)

* Fix bug in MergeModel.cpp. * Add a c-api inferface to initilize the thread environment of Paddle and add a GPU example. * Add some note for paddle_init_thread and move the inplementation of paddle_error_string into a .cpp file. * Add some comments.
7 years ago · 00b64f6679
parent 36fcc95cab
commit 00b64f6679
7 changed files with 192 additions and 5 deletions
--- a/paddle/capi/Main.cpp
+++ b/paddle/capi/Main.cpp
@ -43,4 +43,11 @@ paddle_error paddle_init(int argc, char** argv) {
  isInit = true;
  return kPD_NO_ERROR;
 }
 paddle_error paddle_init_thread() {
  if (FLAGS_use_gpu) {
    hl_init(FLAGS_gpu_id);
  }
  return kPD_NO_ERROR;
 }
 }
--- a/paddle/capi/Matrix.cpp
+++ b/paddle/capi/Matrix.cpp
@ -40,7 +40,7 @@ paddle_error paddle_matrix_destroy(paddle_matrix mat) {
 paddle_error paddle_matrix_set_row(paddle_matrix mat,
                                   uint64_t rowID,
                                   paddle_real* rowArray) {
-  if (mat == nullptr) return kPD_NULLPTR;
+  if (mat == nullptr || rowArray == nullptr) return kPD_NULLPTR;
  auto ptr = cast(mat);
  if (ptr->mat == nullptr) return kPD_NULLPTR;
  if (rowID >= ptr->mat->getHeight()) return kPD_OUT_OF_RANGE;
--- a/paddle/capi/error.cpp
+++ b/paddle/capi/error.cpp
@ -0,0 +1,32 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "error.h"
 const char* paddle_error_string(paddle_error err) {
  switch (err) {
    case kPD_NULLPTR:
      return "nullptr error";
    case kPD_OUT_OF_RANGE:
      return "out of range error";
    case kPD_PROTOBUF_ERROR:
      return "protobuf error";
    case kPD_NOT_SUPPORTED:
      return "not supported error";
    case kPD_UNDEFINED_ERROR:
      return "undefined error";
    default:
      return "";
  }
 }
--- a/paddle/capi/error.h
+++ b/paddle/capi/error.h
@ -15,6 +15,8 @@ limitations under the License. */
 #ifndef __PADDLE_CAPI_ERROR_H__
 #define __PADDLE_CAPI_ERROR_H__
 #include "config.h"
 /**
 * Error Type for Paddle API.
 */
@ -27,4 +29,9 @@ typedef enum {
  kPD_UNDEFINED_ERROR = -1,
 } paddle_error;
 /**
 * Error string for Paddle API.
 */
 PD_API const char* paddle_error_string(paddle_error err);
 #endif
--- a/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt
+++ b/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt
@ -1,8 +1,29 @@
 project(multi_thread)
 cmake_minimum_required(VERSION 2.8)
-aux_source_directory(. SRC_LIST)
+
 add_executable(${PROJECT_NAME} ${SRC_LIST})
 find_package (Threads)
 if(NOT PADDLE_ROOT)
  set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path")
 endif()
 if(PADDLE_ROOT)
  include_directories(${PADDLE_ROOT}/include)
  link_directories(${PADDLE_ROOT}/lib)
 endif()
 set(CPU_SRCS main.c)
 add_executable(${PROJECT_NAME} ${CPU_SRCS})
 set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99)
-target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared
+target_link_libraries(${PROJECT_NAME}
                      -lpaddle_capi_shared
                      ${CMAKE_THREAD_LIBS_INIT})
 find_package(CUDA QUIET)
 if(CUDA_FOUND)
  set(GPU_SRCS main_gpu.c)
  cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS})
  set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99)
  target_link_libraries(${PROJECT_NAME}_gpu
                        -lpaddle_capi_shared
                        ${CMAKE_THREAD_LIBS_INIT})
 endif(CUDA_FOUND)
--- a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c
+++ b/paddle/capi/examples/model_inference/multi_thread/main_gpu.c
@ -0,0 +1,113 @@
 #include <paddle/capi.h>
 #include <pthread.h>
 #include <time.h>
 #include "../common/common.h"
 #define CONFIG_BIN "./trainer_config.bin"
 #define NUM_THREAD 4
 #define NUM_ITER 1000
 pthread_mutex_t mutex;
 /*
 * @brief It is an simple inference example that runs multi-threads on a GPU.
 *        Each thread holds it own local gradient_machine but shares the same
 *        parameters.
 *        If you want to run on different GPUs, you need to launch
 *        multi-processes or set trainer_count > 1.
 */
 void* thread_main(void* gm_ptr) {
  // Initialize the thread environment of Paddle.
  CHECK(paddle_init_thread());
  paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr);
  // Create input arguments.
  paddle_arguments in_args = paddle_arguments_create_none();
  // Create input matrix.
  paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
                                           /* size */ 784,
                                           /* useGPU */ true);
  // Create output arguments.
  paddle_arguments out_args = paddle_arguments_create_none();
  // Create output matrix.
  paddle_matrix prob = paddle_matrix_create_none();
  // CPU buffer to cache the input and output.
  paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real));
  paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real));
  for (int iter = 0; iter < NUM_ITER; ++iter) {
    // There is only one input layer of this network.
    CHECK(paddle_arguments_resize(in_args, 1));
    CHECK(paddle_arguments_set_value(in_args, 0, mat));
    for (int i = 0; i < 784; ++i) {
      cpu_input[i] = rand() / ((float)RAND_MAX);
    }
    CHECK(paddle_matrix_set_value(mat, cpu_input));
    CHECK(paddle_gradient_machine_forward(machine,
                                          in_args,
                                          out_args,
                                          /* isTrain */ false));
    CHECK(paddle_arguments_get_value(out_args, 0, prob));
    CHECK(paddle_matrix_get_value(prob, cpu_output));
    pthread_mutex_lock(&mutex);
    printf("Prob: ");
    for (int i = 0; i < 10; ++i) {
      printf("%.2f ", cpu_output[i]);
    }
    printf("\n");
    pthread_mutex_unlock(&mutex);
  }
  CHECK(paddle_matrix_destroy(prob));
  CHECK(paddle_arguments_destroy(out_args));
  CHECK(paddle_matrix_destroy(mat));
  CHECK(paddle_arguments_destroy(in_args));
  CHECK(paddle_gradient_machine_destroy(machine));
  free(cpu_input);
  free(cpu_output);
  return NULL;
 }
 int main() {
  // Initalize Paddle
  char* argv[] = {"--use_gpu=True"};
  CHECK(paddle_init(1, (char**)argv));
  // Reading config binary file. It is generated by `convert_protobin.sh`
  long size;
  void* buf = read_config(CONFIG_BIN, &size);
  // Create a gradient machine for inference.
  paddle_gradient_machine machine;
  CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
  CHECK(paddle_gradient_machine_randomize_param(machine));
  // Loading parameter. Uncomment the following line and change the directory.
  // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
  //                                                "./some_where_to_params"));
  srand(time(0));
  pthread_mutex_init(&mutex, NULL);
  pthread_t threads[NUM_THREAD];
  for (int i = 0; i < NUM_THREAD; ++i) {
    paddle_gradient_machine thread_local_machine;
    CHECK(paddle_gradient_machine_create_shared_param(
        machine, buf, size, &thread_local_machine));
    pthread_create(&threads[i], NULL, thread_main, thread_local_machine);
  }
  for (int i = 0; i < NUM_THREAD; ++i) {
    pthread_join(threads[i], NULL);
  }
  pthread_mutex_destroy(&mutex);
  return 0;
 }
--- a/paddle/capi/main.h
+++ b/paddle/capi/main.h
@ -26,6 +26,13 @@ extern "C" {
 */
 PD_API paddle_error paddle_init(int argc, char** argv);
 /**
 * Initialize the thread environment of Paddle.
 * @note it is requisite for GPU runs but optional for CPU runs.
 *       For GPU runs, all threads will run on the same GPU devices.
 */
 PD_API paddle_error paddle_init_thread();
 #ifdef __cplusplus
 }
 #endif