Add a c-api interface to initialize the thread environment of Paddle (#5773)
* Fix bug in MergeModel.cpp. * Add a c-api inferface to initilize the thread environment of Paddle and add a GPU example. * Add some note for paddle_init_thread and move the inplementation of paddle_error_string into a .cpp file. * Add some comments.del_some_in_makelist
parent
36fcc95cab
commit
00b64f6679
@ -0,0 +1,32 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "error.h"
|
||||||
|
|
||||||
|
const char* paddle_error_string(paddle_error err) {
|
||||||
|
switch (err) {
|
||||||
|
case kPD_NULLPTR:
|
||||||
|
return "nullptr error";
|
||||||
|
case kPD_OUT_OF_RANGE:
|
||||||
|
return "out of range error";
|
||||||
|
case kPD_PROTOBUF_ERROR:
|
||||||
|
return "protobuf error";
|
||||||
|
case kPD_NOT_SUPPORTED:
|
||||||
|
return "not supported error";
|
||||||
|
case kPD_UNDEFINED_ERROR:
|
||||||
|
return "undefined error";
|
||||||
|
default:
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
@ -1,8 +1,29 @@
|
|||||||
project(multi_thread)
|
project(multi_thread)
|
||||||
cmake_minimum_required(VERSION 2.8)
|
cmake_minimum_required(VERSION 2.8)
|
||||||
aux_source_directory(. SRC_LIST)
|
|
||||||
add_executable(${PROJECT_NAME} ${SRC_LIST})
|
|
||||||
find_package (Threads)
|
find_package (Threads)
|
||||||
|
|
||||||
|
if(NOT PADDLE_ROOT)
|
||||||
|
set(PADDLE_ROOT $ENV{PADDLE_ROOT} CACHE PATH "Paddle Path")
|
||||||
|
endif()
|
||||||
|
if(PADDLE_ROOT)
|
||||||
|
include_directories(${PADDLE_ROOT}/include)
|
||||||
|
link_directories(${PADDLE_ROOT}/lib)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(CPU_SRCS main.c)
|
||||||
|
add_executable(${PROJECT_NAME} ${CPU_SRCS})
|
||||||
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99)
|
set_property(TARGET ${PROJECT_NAME} PROPERTY C_STANDARD 99)
|
||||||
target_link_libraries(${PROJECT_NAME} -lpaddle_capi_shared
|
target_link_libraries(${PROJECT_NAME}
|
||||||
|
-lpaddle_capi_shared
|
||||||
${CMAKE_THREAD_LIBS_INIT})
|
${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
|
find_package(CUDA QUIET)
|
||||||
|
if(CUDA_FOUND)
|
||||||
|
set(GPU_SRCS main_gpu.c)
|
||||||
|
cuda_add_executable(${PROJECT_NAME}_gpu ${GPU_SRCS})
|
||||||
|
set_property(TARGET ${PROJECT_NAME}_gpu PROPERTY C_STANDARD 99)
|
||||||
|
target_link_libraries(${PROJECT_NAME}_gpu
|
||||||
|
-lpaddle_capi_shared
|
||||||
|
${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
endif(CUDA_FOUND)
|
||||||
|
@ -0,0 +1,113 @@
|
|||||||
|
#include <paddle/capi.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "../common/common.h"
|
||||||
|
|
||||||
|
#define CONFIG_BIN "./trainer_config.bin"
|
||||||
|
#define NUM_THREAD 4
|
||||||
|
#define NUM_ITER 1000
|
||||||
|
|
||||||
|
pthread_mutex_t mutex;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief It is an simple inference example that runs multi-threads on a GPU.
|
||||||
|
* Each thread holds it own local gradient_machine but shares the same
|
||||||
|
* parameters.
|
||||||
|
* If you want to run on different GPUs, you need to launch
|
||||||
|
* multi-processes or set trainer_count > 1.
|
||||||
|
*/
|
||||||
|
void* thread_main(void* gm_ptr) {
|
||||||
|
// Initialize the thread environment of Paddle.
|
||||||
|
CHECK(paddle_init_thread());
|
||||||
|
|
||||||
|
paddle_gradient_machine machine = (paddle_gradient_machine)(gm_ptr);
|
||||||
|
// Create input arguments.
|
||||||
|
paddle_arguments in_args = paddle_arguments_create_none();
|
||||||
|
// Create input matrix.
|
||||||
|
paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
|
||||||
|
/* size */ 784,
|
||||||
|
/* useGPU */ true);
|
||||||
|
// Create output arguments.
|
||||||
|
paddle_arguments out_args = paddle_arguments_create_none();
|
||||||
|
// Create output matrix.
|
||||||
|
paddle_matrix prob = paddle_matrix_create_none();
|
||||||
|
|
||||||
|
// CPU buffer to cache the input and output.
|
||||||
|
paddle_real* cpu_input = (paddle_real*)malloc(784 * sizeof(paddle_real));
|
||||||
|
paddle_real* cpu_output = (paddle_real*)malloc(10 * sizeof(paddle_real));
|
||||||
|
for (int iter = 0; iter < NUM_ITER; ++iter) {
|
||||||
|
// There is only one input layer of this network.
|
||||||
|
CHECK(paddle_arguments_resize(in_args, 1));
|
||||||
|
CHECK(paddle_arguments_set_value(in_args, 0, mat));
|
||||||
|
|
||||||
|
for (int i = 0; i < 784; ++i) {
|
||||||
|
cpu_input[i] = rand() / ((float)RAND_MAX);
|
||||||
|
}
|
||||||
|
CHECK(paddle_matrix_set_value(mat, cpu_input));
|
||||||
|
|
||||||
|
CHECK(paddle_gradient_machine_forward(machine,
|
||||||
|
in_args,
|
||||||
|
out_args,
|
||||||
|
/* isTrain */ false));
|
||||||
|
|
||||||
|
CHECK(paddle_arguments_get_value(out_args, 0, prob));
|
||||||
|
CHECK(paddle_matrix_get_value(prob, cpu_output));
|
||||||
|
|
||||||
|
pthread_mutex_lock(&mutex);
|
||||||
|
printf("Prob: ");
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
printf("%.2f ", cpu_output[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
pthread_mutex_unlock(&mutex);
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK(paddle_matrix_destroy(prob));
|
||||||
|
CHECK(paddle_arguments_destroy(out_args));
|
||||||
|
CHECK(paddle_matrix_destroy(mat));
|
||||||
|
CHECK(paddle_arguments_destroy(in_args));
|
||||||
|
CHECK(paddle_gradient_machine_destroy(machine));
|
||||||
|
|
||||||
|
free(cpu_input);
|
||||||
|
free(cpu_output);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
// Initalize Paddle
|
||||||
|
char* argv[] = {"--use_gpu=True"};
|
||||||
|
CHECK(paddle_init(1, (char**)argv));
|
||||||
|
|
||||||
|
// Reading config binary file. It is generated by `convert_protobin.sh`
|
||||||
|
long size;
|
||||||
|
void* buf = read_config(CONFIG_BIN, &size);
|
||||||
|
|
||||||
|
// Create a gradient machine for inference.
|
||||||
|
paddle_gradient_machine machine;
|
||||||
|
CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
|
||||||
|
CHECK(paddle_gradient_machine_randomize_param(machine));
|
||||||
|
|
||||||
|
// Loading parameter. Uncomment the following line and change the directory.
|
||||||
|
// CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
|
||||||
|
// "./some_where_to_params"));
|
||||||
|
srand(time(0));
|
||||||
|
pthread_mutex_init(&mutex, NULL);
|
||||||
|
|
||||||
|
pthread_t threads[NUM_THREAD];
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_THREAD; ++i) {
|
||||||
|
paddle_gradient_machine thread_local_machine;
|
||||||
|
CHECK(paddle_gradient_machine_create_shared_param(
|
||||||
|
machine, buf, size, &thread_local_machine));
|
||||||
|
pthread_create(&threads[i], NULL, thread_main, thread_local_machine);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < NUM_THREAD; ++i) {
|
||||||
|
pthread_join(threads[i], NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_destroy(&mutex);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in new issue