Merge pull request #2174 from gangliao/define_cmake
	
		
	
				
					
				
			Define cc_xxx and nv_xxx to simplify cmakerefactor_docs
						commit
						0a4b540a6a
					
				| @ -0,0 +1,129 @@ | ||||
| # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||||
| #  | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| #  | ||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||
| #  | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| 
 | ||||
| 
 | ||||
| # To simplify the build process of PaddlePaddle, we defined couple of | ||||
| # fundamental abstractions, e.g., how to build library, binary and | ||||
| # test in C++, CUDA and Go. | ||||
| #  | ||||
| # ------------------------------------------- | ||||
| #    C++	      CUDA C++	      Go | ||||
| # ------------------------------------------- | ||||
| # cc_library	 nv_library	  go_library | ||||
| # cc_binary  	 nv_binary	  go_binary | ||||
| # cc_test        nv_test	  go_test | ||||
| # ------------------------------------------- | ||||
| # | ||||
| # cmake_parse_arguments can help us to achieve this goal. | ||||
| # https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html | ||||
| 
 | ||||
| # cc_library parses tensor.cc and figures out that target also depend on tensor.h. | ||||
| # cc_library(tensor | ||||
| #   SRCS | ||||
| #   tensor.cc | ||||
| #   DEPS | ||||
| #   variant) | ||||
| function(cc_library TARGET_NAME) | ||||
|   set(options OPTIONAL) | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   if (${cc_library_OPTIONAL} STREQUAL "SHARED") | ||||
|     add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) | ||||
|   else() | ||||
|     add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) | ||||
|   endif() | ||||
|   add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies}) | ||||
| endfunction(cc_library) | ||||
| 
 | ||||
| # cc_binary parses tensor.cc and figures out that target also depend on tensor.h. | ||||
| # cc_binary(tensor | ||||
| #   SRCS | ||||
| #   tensor.cc) | ||||
| function(cc_binary TARGET_NAME) | ||||
|   set(options OPTIONAL) | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   add_executable(${TARGET_NAME} ${cc_binary_SRCS}) | ||||
|   add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) | ||||
|   target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) | ||||
| endfunction(cc_binary) | ||||
| 
 | ||||
| # The dependency to target tensor implies that if any of | ||||
| # tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. | ||||
| # cc_test(tensor_test | ||||
| #   SRCS | ||||
| #   tensor_test.cc | ||||
| #   DEPS | ||||
| #   tensor) | ||||
| function(cc_test TARGET_NAME) | ||||
|   set(options "") | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   add_executable(${TARGET_NAME} ${cc_test_SRCS}) | ||||
|   add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies}) | ||||
|   target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) | ||||
|   add_test(${TARGET_NAME} ${TARGET_NAME}) | ||||
| endfunction(cc_test) | ||||
| 
 | ||||
| # Suppose that ops.cu includes global functions that take Tensor as | ||||
| # their parameters, so ops depend on tensor. This implies that if | ||||
| # any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built. | ||||
| # nv_library(ops | ||||
| #   SRCS | ||||
| #   ops.cu | ||||
| #   DEPS | ||||
| #   tensor) | ||||
| function(nv_library TARGET_NAME) | ||||
|   set(options OPTIONAL) | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   if (${nv_library_OPTIONAL} STREQUAL "SHARED") | ||||
|     cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) | ||||
|   else() | ||||
|     cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) | ||||
|   endif() | ||||
|   add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies}) | ||||
| endfunction(nv_library) | ||||
| 
 | ||||
| function(nv_binary TARGET_NAME) | ||||
|   set(options "") | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) | ||||
|   add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies}) | ||||
|   target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) | ||||
| endfunction(nv_binary) | ||||
| 
 | ||||
| # The dependency to target tensor implies that if any of | ||||
| # ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built. | ||||
| # nv_test(ops_test | ||||
| #   SRCS | ||||
| #   ops_test.cu | ||||
| #   DEPS | ||||
| #   ops) | ||||
| function(nv_test TARGET_NAME) | ||||
|   set(options "") | ||||
|   set(oneValueArgs "") | ||||
|   set(multiValueArgs SRCS DEPS) | ||||
|   cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | ||||
|   cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) | ||||
|   add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies}) | ||||
|   target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) | ||||
|   add_test(${TARGET_NAME} ${TARGET_NAME}) | ||||
| endfunction(nv_test) | ||||
| @ -1,10 +1,7 @@ | ||||
| file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") | ||||
| cc_test(place_test | ||||
|     SRCS place_test.cc | ||||
|     DEPS majel) | ||||
| 
 | ||||
| add_executable(majel_tests ${ALL_TEST_FILES}) | ||||
| add_dependencies(majel_tests majel) | ||||
| target_link_libraries(majel_tests      | ||||
|                       ${GTEST_LIBRARIES} | ||||
|                       ${GTEST_MAIN_LIBRARIES} | ||||
|                       majel | ||||
|                      ) | ||||
| add_test(majel_tests majel_tests) | ||||
| if(WITH_GPU) | ||||
|     nv_test(cuda_test SRCS cuda_test.cu) | ||||
| endif() | ||||
|  | ||||
| @ -0,0 +1,59 @@ | ||||
| #include <cuda_runtime.h> | ||||
| #include <stdio.h> | ||||
| #include "gtest/gtest.h" | ||||
| 
 | ||||
| #define CHECK_ERR(x)                 \ | ||||
|   if (x != cudaSuccess) {            \ | ||||
|     fprintf(stderr,                  \ | ||||
|             "%s in %s at line %d\n", \ | ||||
|             cudaGetErrorString(err), \ | ||||
|             __FILE__,                \ | ||||
|             __LINE__);               \ | ||||
|     exit(-1);                        \ | ||||
|   } | ||||
| 
 | ||||
| __global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) { | ||||
|   int i = blockDim.x * blockIdx.x + threadIdx.x; | ||||
|   if (i < n) { | ||||
|     d_C[i] = d_A[i] + d_B[i]; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| TEST(Cuda, Equality) { | ||||
|   int n = 10; | ||||
|   // Memory allocation for h_A, h_B and h_C (in the host) | ||||
|   float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0}; | ||||
|   float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0}; | ||||
|   float h_C[10]; | ||||
|   float *d_A, *d_B, *d_C; | ||||
|   cudaError_t err; | ||||
|   // Memory allocation for d_A, d_B and d_C (in the device) | ||||
|   err = cudaMalloc((void **)&d_A, sizeof(float) * n); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   err = cudaMalloc((void **)&d_B, sizeof(float) * n); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   err = cudaMalloc((void **)&d_C, sizeof(float) * n); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   // Copying memory to device | ||||
|   err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   // Calling the kernel | ||||
|   vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n); | ||||
| 
 | ||||
|   // Copying results back to host | ||||
|   err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost); | ||||
|   CHECK_ERR(err); | ||||
| 
 | ||||
|   EXPECT_EQ(h_C[0], 1.0); | ||||
|   for (int i = 1; i < n - 1; ++i) { | ||||
|     EXPECT_EQ(h_C[i], 11.0); | ||||
|   } | ||||
|   EXPECT_EQ(h_C[9], 1.0); | ||||
| } | ||||
					Loading…
					
					
				
		Reference in new issue