Merge pull request #2174 from gangliao/define_cmake
Define cc_xxx and nv_xxx to simplify cmakerefactor_docs
commit
0a4b540a6a
@ -0,0 +1,129 @@
|
|||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
# To simplify the build process of PaddlePaddle, we defined couple of
|
||||||
|
# fundamental abstractions, e.g., how to build library, binary and
|
||||||
|
# test in C++, CUDA and Go.
|
||||||
|
#
|
||||||
|
# -------------------------------------------
|
||||||
|
# C++ CUDA C++ Go
|
||||||
|
# -------------------------------------------
|
||||||
|
# cc_library nv_library go_library
|
||||||
|
# cc_binary nv_binary go_binary
|
||||||
|
# cc_test nv_test go_test
|
||||||
|
# -------------------------------------------
|
||||||
|
#
|
||||||
|
# cmake_parse_arguments can help us to achieve this goal.
|
||||||
|
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
|
||||||
|
|
||||||
|
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
|
||||||
|
# cc_library(tensor
|
||||||
|
# SRCS
|
||||||
|
# tensor.cc
|
||||||
|
# DEPS
|
||||||
|
# variant)
|
||||||
|
function(cc_library TARGET_NAME)
|
||||||
|
set(options OPTIONAL)
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
if (${cc_library_OPTIONAL} STREQUAL "SHARED")
|
||||||
|
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
|
||||||
|
else()
|
||||||
|
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
|
||||||
|
endif()
|
||||||
|
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies})
|
||||||
|
endfunction(cc_library)
|
||||||
|
|
||||||
|
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
|
||||||
|
# cc_binary(tensor
|
||||||
|
# SRCS
|
||||||
|
# tensor.cc)
|
||||||
|
function(cc_binary TARGET_NAME)
|
||||||
|
set(options OPTIONAL)
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
|
||||||
|
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
|
||||||
|
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
|
||||||
|
endfunction(cc_binary)
|
||||||
|
|
||||||
|
# The dependency to target tensor implies that if any of
|
||||||
|
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
|
||||||
|
# cc_test(tensor_test
|
||||||
|
# SRCS
|
||||||
|
# tensor_test.cc
|
||||||
|
# DEPS
|
||||||
|
# tensor)
|
||||||
|
function(cc_test TARGET_NAME)
|
||||||
|
set(options "")
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
add_executable(${TARGET_NAME} ${cc_test_SRCS})
|
||||||
|
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies})
|
||||||
|
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
|
||||||
|
add_test(${TARGET_NAME} ${TARGET_NAME})
|
||||||
|
endfunction(cc_test)
|
||||||
|
|
||||||
|
# Suppose that ops.cu includes global functions that take Tensor as
|
||||||
|
# their parameters, so ops depend on tensor. This implies that if
|
||||||
|
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
|
||||||
|
# nv_library(ops
|
||||||
|
# SRCS
|
||||||
|
# ops.cu
|
||||||
|
# DEPS
|
||||||
|
# tensor)
|
||||||
|
function(nv_library TARGET_NAME)
|
||||||
|
set(options OPTIONAL)
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
|
||||||
|
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
|
||||||
|
else()
|
||||||
|
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
|
||||||
|
endif()
|
||||||
|
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
|
||||||
|
endfunction(nv_library)
|
||||||
|
|
||||||
|
function(nv_binary TARGET_NAME)
|
||||||
|
set(options "")
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
|
||||||
|
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
|
||||||
|
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
|
||||||
|
endfunction(nv_binary)
|
||||||
|
|
||||||
|
# The dependency to target tensor implies that if any of
|
||||||
|
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
|
||||||
|
# nv_test(ops_test
|
||||||
|
# SRCS
|
||||||
|
# ops_test.cu
|
||||||
|
# DEPS
|
||||||
|
# ops)
|
||||||
|
function(nv_test TARGET_NAME)
|
||||||
|
set(options "")
|
||||||
|
set(oneValueArgs "")
|
||||||
|
set(multiValueArgs SRCS DEPS)
|
||||||
|
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
|
||||||
|
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
|
||||||
|
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
|
||||||
|
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
|
||||||
|
add_test(${TARGET_NAME} ${TARGET_NAME})
|
||||||
|
endfunction(nv_test)
|
@ -1,10 +1,7 @@
|
|||||||
file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
|
cc_test(place_test
|
||||||
|
SRCS place_test.cc
|
||||||
|
DEPS majel)
|
||||||
|
|
||||||
add_executable(majel_tests ${ALL_TEST_FILES})
|
if(WITH_GPU)
|
||||||
add_dependencies(majel_tests majel)
|
nv_test(cuda_test SRCS cuda_test.cu)
|
||||||
target_link_libraries(majel_tests
|
endif()
|
||||||
${GTEST_LIBRARIES}
|
|
||||||
${GTEST_MAIN_LIBRARIES}
|
|
||||||
majel
|
|
||||||
)
|
|
||||||
add_test(majel_tests majel_tests)
|
|
||||||
|
@ -0,0 +1,59 @@
|
|||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
#define CHECK_ERR(x) \
|
||||||
|
if (x != cudaSuccess) { \
|
||||||
|
fprintf(stderr, \
|
||||||
|
"%s in %s at line %d\n", \
|
||||||
|
cudaGetErrorString(err), \
|
||||||
|
__FILE__, \
|
||||||
|
__LINE__); \
|
||||||
|
exit(-1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
|
||||||
|
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
if (i < n) {
|
||||||
|
d_C[i] = d_A[i] + d_B[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Cuda, Equality) {
|
||||||
|
int n = 10;
|
||||||
|
// Memory allocation for h_A, h_B and h_C (in the host)
|
||||||
|
float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
|
||||||
|
float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
|
||||||
|
float h_C[10];
|
||||||
|
float *d_A, *d_B, *d_C;
|
||||||
|
cudaError_t err;
|
||||||
|
// Memory allocation for d_A, d_B and d_C (in the device)
|
||||||
|
err = cudaMalloc((void **)&d_A, sizeof(float) * n);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
err = cudaMalloc((void **)&d_B, sizeof(float) * n);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
err = cudaMalloc((void **)&d_C, sizeof(float) * n);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
// Copying memory to device
|
||||||
|
err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
// Calling the kernel
|
||||||
|
vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
|
||||||
|
|
||||||
|
// Copying results back to host
|
||||||
|
err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
|
||||||
|
CHECK_ERR(err);
|
||||||
|
|
||||||
|
EXPECT_EQ(h_C[0], 1.0);
|
||||||
|
for (int i = 1; i < n - 1; ++i) {
|
||||||
|
EXPECT_EQ(h_C[i], 11.0);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(h_C[9], 1.0);
|
||||||
|
}
|
Loading…
Reference in new issue