You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/capi/examples/model_inference/multi_thread/main_gpu.c

114 lines
3.6 KiB

#include <paddle/capi.h>
#include <pthread.h>
#include <time.h>
#include "../common/common.h"
#define CONFIG_BIN "./trainer_config.bin"
#define NUM_THREAD 4
#define NUM_ITER 1000
pthread_mutex_t mutex;
/*
* @brief It is an simple inference example that runs multi-threads on a GPU.
* Each thread holds it own local gradient_machine but shares the same
* parameters.
* If you want to run on different GPUs, you need to launch
* multi-processes or set trainer_count > 1.
*/
void* thread_main(void* gm_ptr) {
// Initialize the thread environment of Paddle.
CHECK(paddle_init_thread());
paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr);
// Create input arguments.
paddle_arguments in_args = paddle_arguments_create_none();
// Create input matrix.
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
/* size */ 784,
/* useGPU */ true);
// Create output arguments.
paddle_arguments out_args = paddle_arguments_create_none();
// Create output matrix.
paddle_matrix prob = paddle_matrix_create_none();
// CPU buffer to cache the input and output.
paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real));
paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real));
for (int iter = 0; iter < NUM_ITER; ++iter) {
// There is only one input layer of this network.
CHECK(paddle_arguments_resize(in_args, 1));
CHECK(paddle_arguments_set_value(in_args, 0, mat));
for (int i = 0; i < 784; ++i) {
cpu_input[i] = rand() / ((float)RAND_MAX);
}
CHECK(paddle_matrix_set_value(mat, cpu_input));
CHECK(paddle_gradient_machine_forward(machine,
in_args,
out_args,
/* isTrain */ false));
CHECK(paddle_arguments_get_value(out_args, 0, prob));
CHECK(paddle_matrix_get_value(prob, cpu_output));
pthread_mutex_lock(&mutex);
printf("Prob: ");
for (int i = 0; i < 10; ++i) {
printf("%.2f ", cpu_output[i]);
}
printf("\n");
pthread_mutex_unlock(&mutex);
}
CHECK(paddle_matrix_destroy(prob));
CHECK(paddle_arguments_destroy(out_args));
CHECK(paddle_matrix_destroy(mat));
CHECK(paddle_arguments_destroy(in_args));
CHECK(paddle_gradient_machine_destroy(machine));
free(cpu_input);
free(cpu_output);
return NULL;
}
int main() {
// Initalize Paddle
char* argv[] = {"--use_gpu=True"};
CHECK(paddle_init(1, (char**)argv));
// Reading config binary file. It is generated by `convert_protobin.sh`
long size;
void* buf = read_config(CONFIG_BIN, &size);
// Create a gradient machine for inference.
paddle_gradient_machine machine;
CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
CHECK(paddle_gradient_machine_randomize_param(machine));
// Loading parameter. Uncomment the following line and change the directory.
// CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
// "./some_where_to_params"));
srand(time(0));
pthread_mutex_init(&mutex, NULL);
pthread_t threads[NUM_THREAD];
for (int i = 0; i < NUM_THREAD; ++i) {
paddle_gradient_machine thread_local_machine;
CHECK(paddle_gradient_machine_create_shared_param(
machine, buf, size, &thread_local_machine));
pthread_create(&threads[i], NULL, thread_main, thread_local_machine);
}
for (int i = 0; i < NUM_THREAD; ++i) {
pthread_join(threads[i], NULL);
}
pthread_mutex_destroy(&mutex);
return 0;
}