From 52a1e0197bc88916a19c25d7c27ee007883d6b57 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 13:10:38 +0800 Subject: [PATCH 1/6] Define cc_xxx to simplify cmake --- CMakeLists.txt | 1 + cmake/generic.cmake | 77 ++++++++++++++++++++++++++++++++ paddle/majel/CMakeLists.txt | 41 +---------------- paddle/majel/test/CMakeLists.txt | 13 ++---- 4 files changed, 82 insertions(+), 50 deletions(-) create mode 100644 cmake/generic.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index fc85f83b94..884afa962b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ include(external/swig) # download, build, install swig include(external/warpctc) # download, build, install warpctc include(external/any) # download libn::any +include(generic) # simplify cmake module include(package) # set paddle packages include(cpplint) # set paddle c++ style include(ccache) # set ccache for compilation diff --git a/cmake/generic.cmake b/cmake/generic.cmake new file mode 100644 index 0000000000..076a9514ad --- /dev/null +++ b/cmake/generic.cmake @@ -0,0 +1,77 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# To simplify the build process of PaddlePaddle, we defined couple of +# fundamental abstractions, e.g., how to build library, binary and +# test in C++, CUDA and Go. +# +# ------------------------------------------- +# C++ CUDA C++ Go +# ------------------------------------------- +# cc_library nv_library go_library +# cc_binary nv_binary go_binary +# cc_test nv_test go_test +# ------------------------------------------- + +# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. +# cc_binary(tensor +# SRCS +# tensor.cc) +function(cc_binary TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_binary_SRCS}) + add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) +endfunction(cc_binary) + +# cc_library parses tensor.cc and figures out that target also depend on tensor.h. +# cc_library(tensor +# SRCS +# tensor.cc +# DEPS +# variant) +function(cc_library TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (${cc_library_OPTIONAL} STREQUAL "STATIC") + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) + else() + add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + endif() + add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies}) +endfunction(cc_library) + +# The dependency to target tensor implies that if any of +# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. +# cc_test(tensor_test +# SRCS +# tensor_test.cc +# DEPS +# tensor) +function(cc_test TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_test_SRCS}) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) + add_test(${TARGET_NAME} ${TARGET_NAME}) +endfunction(cc_test) diff --git a/paddle/majel/CMakeLists.txt b/paddle/majel/CMakeLists.txt index d4977df118..d4bce38906 100644 --- a/paddle/majel/CMakeLists.txt +++ b/paddle/majel/CMakeLists.txt @@ -1,43 +1,4 @@ -cmake_minimum_required(VERSION 3.0) - -if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR}) - # find #include - get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY) - include_directories(${PARENT_DIR}) - - # find cmake directory modules - get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake") - - # enable boost - find_package(Boost REQUIRED) - if(NOT Boost_FOUND) - message(FATAL "Cannot find Boost library.") - endif() - include_directories(${Boost_INCLUDE_DIRS}) - - # enable c++11 - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - - # enable gtest - set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/third_party) - set(WITH_TESTING ON) - include(external/gtest) -else() - message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})") -endif() - -########################### Build Majel ############################# -set(MAJEL_CXX_FILES place.cc) -set(MAJEL_CUDA_FILES "") - -if(CUDA_FOUND) - cuda_add_library(majel ${MAJEL_CUDA_FILES} ${MAJEL_CXX_FILES}) -else() - add_library(majel ${MAJEL_CXX_FILES}) -endif() -add_dependencies(majel ${external_project_dependencies}) -##################################################################### +cc_library(majel SRCS place.cc) if(WITH_TESTING) add_subdirectory(test) diff --git a/paddle/majel/test/CMakeLists.txt b/paddle/majel/test/CMakeLists.txt index 76327fdd70..6379a4d6e7 100644 --- a/paddle/majel/test/CMakeLists.txt +++ b/paddle/majel/test/CMakeLists.txt @@ -1,10 +1,3 @@ -file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") - -add_executable(majel_tests ${ALL_TEST_FILES}) -add_dependencies(majel_tests majel) -target_link_libraries(majel_tests - ${GTEST_LIBRARIES} - ${GTEST_MAIN_LIBRARIES} - majel - ) -add_test(majel_tests majel_tests) +cc_test(place_test + SRCS place_test.cc + DEPS majel) From 5409e5e3cc1617f93350f13636fe527bc4096c37 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 13:36:06 +0800 Subject: [PATCH 2/6] add reference --- cmake/generic.cmake | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 076a9514ad..4001c91507 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -22,8 +22,12 @@ # ------------------------------------------- # cc_library nv_library go_library # cc_binary nv_binary go_binary -# cc_test nv_test go_test +# cc_test nv_test go_test # ------------------------------------------- +# +# cmake_parse_arguments can help us to achieve this goal. +# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html + # cc_binary parses tensor.cc and figures out that target also depend on tensor.h. # cc_binary(tensor From ed5bcfe8e37973d3750ed9ed04452ff3d98ec211 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 15:05:02 +0800 Subject: [PATCH 3/6] add cu_xx to simplify cmake --- cmake/generic.cmake | 84 +++++++++++++++++++++++++------- paddle/majel/test/CMakeLists.txt | 4 ++ paddle/majel/test/cuda_test.cu | 56 +++++++++++++++++++++ 3 files changed, 126 insertions(+), 18 deletions(-) create mode 100644 paddle/majel/test/cuda_test.cu diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 4001c91507..22a26d7c5b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -28,21 +28,6 @@ # cmake_parse_arguments can help us to achieve this goal. # https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html - -# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. -# cc_binary(tensor -# SRCS -# tensor.cc) -function(cc_binary TARGET_NAME) - set(options OPTIONAL) - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - add_executable(${TARGET_NAME} ${cc_binary_SRCS}) - add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) - target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) -endfunction(cc_binary) - # cc_library parses tensor.cc and figures out that target also depend on tensor.h. # cc_library(tensor # SRCS @@ -54,14 +39,28 @@ function(cc_library TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (${cc_library_OPTIONAL} STREQUAL "STATIC") - add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) - else() + if (${cc_library_OPTIONAL} STREQUAL "SHARED") add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + else() + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) endif() add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies}) endfunction(cc_library) +# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. +# cc_binary(tensor +# SRCS +# tensor.cc) +function(cc_binary TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_binary_SRCS}) + add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) +endfunction(cc_binary) + # The dependency to target tensor implies that if any of # tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. # cc_test(tensor_test @@ -79,3 +78,52 @@ function(cc_test TARGET_NAME) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) add_test(${TARGET_NAME} ${TARGET_NAME}) endfunction(cc_test) + +# Suppose that ops.cu includes global functions that take Tensor as +# their parameters, so ops depend on tensor. This implies that if +# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built. +# nv_library(ops +# SRCS +# ops.cu +# DEPS +# tensor) +function(nv_library TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (${nv_library_OPTIONAL} STREQUAL "SHARED") + cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) + else() + cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) + endif() + add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies}) +endfunction(nv_library) + +function(nv_binary TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) +endfunction(nv_binary) + +# The dependency to target tensor implies that if any of +# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built. +# nv_test(ops_test +# SRCS +# ops_test.cu +# DEPS +# ops) +function(nv_test TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) + add_test(${TARGET_NAME} ${TARGET_NAME}) +endfunction(nv_test) diff --git a/paddle/majel/test/CMakeLists.txt b/paddle/majel/test/CMakeLists.txt index 6379a4d6e7..68f9059874 100644 --- a/paddle/majel/test/CMakeLists.txt +++ b/paddle/majel/test/CMakeLists.txt @@ -1,3 +1,7 @@ cc_test(place_test SRCS place_test.cc DEPS majel) + +if(WITH_GPU) + nv_test(cuda_test SRCS cuda_test.cu) +endif() diff --git a/paddle/majel/test/cuda_test.cu b/paddle/majel/test/cuda_test.cu new file mode 100644 index 0000000000..ebc9a2786e --- /dev/null +++ b/paddle/majel/test/cuda_test.cu @@ -0,0 +1,56 @@ +#include +#include +#include "gtest/gtest.h" + +#define CHECK_ERR(x) \ + if (x != cudaSuccess) { \ + fprintf(stderr,"%s in %s at line %d\n", \ + cudaGetErrorString(err),__FILE__,__LINE__); \ + exit(-1); \ + } \ + +__global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < n) { + d_C[i] = d_A[i] + d_B[i]; + } +} + +TEST(Cuda, Equality) { + int n = 10; + // Memory allocation for h_A, h_B and h_C (in the host) + float h_A[10] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0 }; + float h_B[10] = { 0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }; + float h_C[10]; + float *d_A, *d_B, *d_C; + + // Memory allocation for d_A, d_B and d_C (in the device) + err = cudaMalloc((void **) &d_A, sizeof(float)*n); + CHECK_ERR(err); + + err =cudaMalloc((void **) &d_B, sizeof(float)*n); + CHECK_ERR(err); + + err =cudaMalloc((void **) &d_C, sizeof(float)*n); + CHECK_ERR(err); + + // Copying memory to device + err = cudaMemcpy(d_A, h_A, sizeof(float)*n, cudaMemcpyHostToDevice); + CHECK_ERR(err); + + err = cudaMemcpy(d_B, h_B, sizeof(float)*n, cudaMemcpyHostToDevice); + CHECK_ERR(err); + + // Calling the kernel + vecAdd<<>>(d_A,d_B,d_C,n); + + // Copying results back to host + err = cudaMemcpy(h_C, d_C, sizeof(float)*n, cudaMemcpyDeviceToHost); + CHECK_ERR(err); + + EXPECT_EQ(h_C[1], 1.0); + for (size_t i = 1; i < n - 1; ++i) { + EXPECT_EQ(h_C[i], 11.0); + } + EXPECT_EQ(h_C[0], 1.0); +} From 26a73e48f976216374fc8a9294228673648a1d1d Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 15:07:05 +0800 Subject: [PATCH 4/6] update code --- paddle/majel/test/cuda_test.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/majel/test/cuda_test.cu b/paddle/majel/test/cuda_test.cu index ebc9a2786e..28cda19ef9 100644 --- a/paddle/majel/test/cuda_test.cu +++ b/paddle/majel/test/cuda_test.cu @@ -5,9 +5,9 @@ #define CHECK_ERR(x) \ if (x != cudaSuccess) { \ fprintf(stderr,"%s in %s at line %d\n", \ - cudaGetErrorString(err),__FILE__,__LINE__); \ - exit(-1); \ - } \ + cudaGetErrorString(err),__FILE__,__LINE__); \ + exit(-1); \ + } __global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) { int i = blockDim.x * blockIdx.x + threadIdx.x; From d7ee421be952cdf13dc9c7bfc0ffe2181b39b5b7 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 15:29:26 +0800 Subject: [PATCH 5/6] add cuda unit test --- paddle/majel/test/cuda_test.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/majel/test/cuda_test.cu b/paddle/majel/test/cuda_test.cu index 28cda19ef9..360c254875 100644 --- a/paddle/majel/test/cuda_test.cu +++ b/paddle/majel/test/cuda_test.cu @@ -23,7 +23,7 @@ TEST(Cuda, Equality) { float h_B[10] = { 0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }; float h_C[10]; float *d_A, *d_B, *d_C; - + cudaError_t err; // Memory allocation for d_A, d_B and d_C (in the device) err = cudaMalloc((void **) &d_A, sizeof(float)*n); CHECK_ERR(err); @@ -48,9 +48,9 @@ TEST(Cuda, Equality) { err = cudaMemcpy(h_C, d_C, sizeof(float)*n, cudaMemcpyDeviceToHost); CHECK_ERR(err); - EXPECT_EQ(h_C[1], 1.0); - for (size_t i = 1; i < n - 1; ++i) { + EXPECT_EQ(h_C[0], 1.0); + for (int i = 1; i < n - 1; ++i) { EXPECT_EQ(h_C[i], 11.0); } - EXPECT_EQ(h_C[0], 1.0); + EXPECT_EQ(h_C[9], 1.0); } From 09c6ddcd635a939414a31dcb4609054c95a8401e Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 17 May 2017 17:15:46 +0800 Subject: [PATCH 6/6] clang-format cuda --- paddle/majel/test/cuda_test.cu | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/paddle/majel/test/cuda_test.cu b/paddle/majel/test/cuda_test.cu index 360c254875..4067dda2f1 100644 --- a/paddle/majel/test/cuda_test.cu +++ b/paddle/majel/test/cuda_test.cu @@ -1,15 +1,18 @@ -#include #include +#include #include "gtest/gtest.h" -#define CHECK_ERR(x) \ - if (x != cudaSuccess) { \ - fprintf(stderr,"%s in %s at line %d\n", \ - cudaGetErrorString(err),__FILE__,__LINE__); \ - exit(-1); \ +#define CHECK_ERR(x) \ + if (x != cudaSuccess) { \ + fprintf(stderr, \ + "%s in %s at line %d\n", \ + cudaGetErrorString(err), \ + __FILE__, \ + __LINE__); \ + exit(-1); \ } -__global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) { +__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) { int i = blockDim.x * blockIdx.x + threadIdx.x; if (i < n) { d_C[i] = d_A[i] + d_B[i]; @@ -19,35 +22,35 @@ __global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) { TEST(Cuda, Equality) { int n = 10; // Memory allocation for h_A, h_B and h_C (in the host) - float h_A[10] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0 }; - float h_B[10] = { 0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }; + float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0}; + float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}; float h_C[10]; float *d_A, *d_B, *d_C; cudaError_t err; // Memory allocation for d_A, d_B and d_C (in the device) - err = cudaMalloc((void **) &d_A, sizeof(float)*n); + err = cudaMalloc((void **)&d_A, sizeof(float) * n); CHECK_ERR(err); - err =cudaMalloc((void **) &d_B, sizeof(float)*n); + err = cudaMalloc((void **)&d_B, sizeof(float) * n); CHECK_ERR(err); - err =cudaMalloc((void **) &d_C, sizeof(float)*n); + err = cudaMalloc((void **)&d_C, sizeof(float) * n); CHECK_ERR(err); - + // Copying memory to device - err = cudaMemcpy(d_A, h_A, sizeof(float)*n, cudaMemcpyHostToDevice); + err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice); CHECK_ERR(err); - err = cudaMemcpy(d_B, h_B, sizeof(float)*n, cudaMemcpyHostToDevice); + err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice); CHECK_ERR(err); // Calling the kernel - vecAdd<<>>(d_A,d_B,d_C,n); + vecAdd<<>>(d_A, d_B, d_C, n); // Copying results back to host - err = cudaMemcpy(h_C, d_C, sizeof(float)*n, cudaMemcpyDeviceToHost); + err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost); CHECK_ERR(err); - + EXPECT_EQ(h_C[0], 1.0); for (int i = 1; i < n - 1; ++i) { EXPECT_EQ(h_C[i], 11.0);