Add a c-api interface to initialize the thread environment of Paddle (#5773)
* Fix bug in MergeModel.cpp. * Add a c-api inferface to initilize the thread environment of Paddle and add a GPU example. * Add some note for paddle_init_thread and move the inplementation of paddle_error_string into a .cpp file. * Add some comments.del_some_in_makelist
parent
36fcc95cab
commit
00b64f6679
@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "error.h"
|
||||
|
||||
const char* paddle_error_string(paddle_error err) {
|
||||
switch (err) {
|
||||
case kPD_NULLPTR:
|
||||
return "nullptr error";
|
||||
case kPD_OUT_OF_RANGE:
|
||||
return "out of range error";
|
||||
case kPD_PROTOBUF_ERROR:
|
||||
return "protobuf error";
|
||||
case kPD_NOT_SUPPORTED:
|
||||
return "not supported error";
|
||||
case kPD_UNDEFINED_ERROR:
|
||||
return "undefined error";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
@ -1,8 +1,29 @@
|
||||
project(multi_thread)
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
aux_source_directory(. SRC_LIST)
|
||||
add_executable(${PROJECT_NAME} ${SRC_LIST})
|
||||
|
||||
find_package (Threads)
|
||||
|
||||
if(NOT PADDLE_ROOT)
|
||||
set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path")
|
||||
endif()
|
||||
if(PADDLE_ROOT)
|
||||
include_directories(${PADDLE_ROOT}/include)
|
||||
link_directories(${PADDLE_ROOT}/lib)
|
||||
endif()
|
||||
|
||||
set(CPU_SRCS main.c)
|
||||
add_executable(${PROJECT_NAME} ${CPU_SRCS})
|
||||
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99)
|
||||
target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
-lpaddle_capi_shared
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
|
||||
find_package(CUDA QUIET)
|
||||
if(CUDA_FOUND)
|
||||
set(GPU_SRCS main_gpu.c)
|
||||
cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS})
|
||||
set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99)
|
||||
target_link_libraries(${PROJECT_NAME}_gpu
|
||||
-lpaddle_capi_shared
|
||||
${CMAKE_THREAD_LIBS_INIT})
|
||||
endif(CUDA_FOUND)
|
||||
|
@ -0,0 +1,113 @@
|
||||
#include <paddle/capi.h>
|
||||
#include <pthread.h>
|
||||
#include <time.h>
|
||||
#include "../common/common.h"
|
||||
|
||||
#define CONFIG_BIN "./trainer_config.bin"
|
||||
#define NUM_THREAD 4
|
||||
#define NUM_ITER 1000
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
|
||||
/*
|
||||
* @brief It is an simple inference example that runs multi-threads on a GPU.
|
||||
* Each thread holds it own local gradient_machine but shares the same
|
||||
* parameters.
|
||||
* If you want to run on different GPUs, you need to launch
|
||||
* multi-processes or set trainer_count > 1.
|
||||
*/
|
||||
void* thread_main(void* gm_ptr) {
|
||||
// Initialize the thread environment of Paddle.
|
||||
CHECK(paddle_init_thread());
|
||||
|
||||
paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr);
|
||||
// Create input arguments.
|
||||
paddle_arguments in_args = paddle_arguments_create_none();
|
||||
// Create input matrix.
|
||||
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
|
||||
/* size */ 784,
|
||||
/* useGPU */ true);
|
||||
// Create output arguments.
|
||||
paddle_arguments out_args = paddle_arguments_create_none();
|
||||
// Create output matrix.
|
||||
paddle_matrix prob = paddle_matrix_create_none();
|
||||
|
||||
// CPU buffer to cache the input and output.
|
||||
paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real));
|
||||
paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real));
|
||||
for (int iter = 0; iter < NUM_ITER; ++iter) {
|
||||
// There is only one input layer of this network.
|
||||
CHECK(paddle_arguments_resize(in_args, 1));
|
||||
CHECK(paddle_arguments_set_value(in_args, 0, mat));
|
||||
|
||||
for (int i = 0; i < 784; ++i) {
|
||||
cpu_input[i] = rand() / ((float)RAND_MAX);
|
||||
}
|
||||
CHECK(paddle_matrix_set_value(mat, cpu_input));
|
||||
|
||||
CHECK(paddle_gradient_machine_forward(machine,
|
||||
in_args,
|
||||
out_args,
|
||||
/* isTrain */ false));
|
||||
|
||||
CHECK(paddle_arguments_get_value(out_args, 0, prob));
|
||||
CHECK(paddle_matrix_get_value(prob, cpu_output));
|
||||
|
||||
pthread_mutex_lock(&mutex);
|
||||
printf("Prob: ");
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
printf("%.2f ", cpu_output[i]);
|
||||
}
|
||||
printf("\n");
|
||||
pthread_mutex_unlock(&mutex);
|
||||
}
|
||||
|
||||
CHECK(paddle_matrix_destroy(prob));
|
||||
CHECK(paddle_arguments_destroy(out_args));
|
||||
CHECK(paddle_matrix_destroy(mat));
|
||||
CHECK(paddle_arguments_destroy(in_args));
|
||||
CHECK(paddle_gradient_machine_destroy(machine));
|
||||
|
||||
free(cpu_input);
|
||||
free(cpu_output);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Initalize Paddle
|
||||
char* argv[] = {"--use_gpu=True"};
|
||||
CHECK(paddle_init(1, (char**)argv));
|
||||
|
||||
// Reading config binary file. It is generated by `convert_protobin.sh`
|
||||
long size;
|
||||
void* buf = read_config(CONFIG_BIN, &size);
|
||||
|
||||
// Create a gradient machine for inference.
|
||||
paddle_gradient_machine machine;
|
||||
CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
|
||||
CHECK(paddle_gradient_machine_randomize_param(machine));
|
||||
|
||||
// Loading parameter. Uncomment the following line and change the directory.
|
||||
// CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
|
||||
// "./some_where_to_params"));
|
||||
srand(time(0));
|
||||
pthread_mutex_init(&mutex, NULL);
|
||||
|
||||
pthread_t threads[NUM_THREAD];
|
||||
|
||||
for (int i = 0; i < NUM_THREAD; ++i) {
|
||||
paddle_gradient_machine thread_local_machine;
|
||||
CHECK(paddle_gradient_machine_create_shared_param(
|
||||
machine, buf, size, &thread_local_machine));
|
||||
pthread_create(&threads[i], NULL, thread_main, thread_local_machine);
|
||||
}
|
||||
|
||||
for (int i = 0; i < NUM_THREAD; ++i) {
|
||||
pthread_join(threads[i], NULL);
|
||||
}
|
||||
|
||||
pthread_mutex_destroy(&mutex);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in new issue